dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
From: Alex Deucher <alexdeucher@gmail.com>
To: dri-devel@lists.freedesktop.org
Cc: "Marek Olšák" <marek.olsak@amd.com>
Subject: [PATCH 69/88] drm/amdgpu: add and implement the GPU reset status query
Date: Tue, 26 May 2015 23:20:08 -0400	[thread overview]
Message-ID: <1432696827-3752-39-git-send-email-alexander.deucher@amd.com> (raw)
In-Reply-To: <1432696827-3752-1-git-send-email-alexander.deucher@amd.com>

From: Marek Olšák <marek.olsak@amd.com>

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  6 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c    | 36 +++++++++++++++++++-----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 +
 include/uapi/drm/amdgpu_drm.h              | 11 ++++++++-
 4 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 66b5bd0..ebff89e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1040,7 +1040,7 @@ struct amdgpu_vm_manager {
 
 struct amdgpu_ctx_state {
 	uint64_t flags;
-	uint64_t hangs;
+	uint32_t hangs;
 };
 
 struct amdgpu_ctx {
@@ -1049,6 +1049,7 @@ struct amdgpu_ctx {
 	struct amdgpu_fpriv *fpriv;
 	struct amdgpu_ctx_state state;
 	uint32_t id;
+	unsigned reset_counter;
 };
 
 struct amdgpu_ctx_mgr {
@@ -1897,8 +1898,6 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev,struct amdgpu_fpriv *fpriv,
 							uint32_t *id,uint32_t flags);
 int amdgpu_ctx_free(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv,
 						  uint32_t id);
-int amdgpu_ctx_query(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv,
-							uint32_t id,struct amdgpu_ctx_state *state);
 
 void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv);
 struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
@@ -2006,6 +2005,7 @@ struct amdgpu_device {
 	atomic64_t			vram_vis_usage;
 	atomic64_t			gtt_usage;
 	atomic64_t			num_bytes_moved;
+	atomic_t			gpu_reset_counter;
 
 	/* display */
 	struct amdgpu_mode_info		mode_info;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index bcd332e..6c66ac8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -81,21 +81,36 @@ int amdgpu_ctx_free(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint
 	return -EINVAL;
 }
 
-int amdgpu_ctx_query(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint32_t id, struct amdgpu_ctx_state *state)
+static int amdgpu_ctx_query(struct amdgpu_device *adev,
+			    struct amdgpu_fpriv *fpriv, uint32_t id,
+			    union drm_amdgpu_ctx_out *out)
 {
 	struct amdgpu_ctx *ctx;
 	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
+	unsigned reset_counter;
 
 	mutex_lock(&mgr->lock);
 	ctx = idr_find(&mgr->ctx_handles, id);
-	if (ctx) {
-		/* state should alter with CS activity */
-		*state = ctx->state;
+	if (!ctx) {
 		mutex_unlock(&mgr->lock);
-		return 0;
+		return -EINVAL;
 	}
+
+	/* TODO: these two are always zero */
+	out->state.flags = ctx->state.flags;
+	out->state.hangs = ctx->state.hangs;
+
+	/* determine if a GPU reset has occured since the last call */
+	reset_counter = atomic_read(&adev->gpu_reset_counter);
+	/* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
+	if (ctx->reset_counter == reset_counter)
+		out->state.reset_status = AMDGPU_CTX_NO_RESET;
+	else
+		out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
+	ctx->reset_counter = reset_counter;
+
 	mutex_unlock(&mgr->lock);
-	return -EINVAL;
+	return 0;
 }
 
 void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv)
@@ -115,12 +130,11 @@ void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv)
 }
 
 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
-							struct drm_file *filp)
+		     struct drm_file *filp)
 {
 	int r;
 	uint32_t id;
 	uint32_t flags;
-	struct amdgpu_ctx_state state;
 
 	union drm_amdgpu_ctx *args = data;
 	struct amdgpu_device *adev = dev->dev_private;
@@ -139,11 +153,7 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 			r = amdgpu_ctx_free(adev, fpriv, id);
 			break;
 		case AMDGPU_CTX_OP_QUERY_STATE:
-			r = amdgpu_ctx_query(adev, fpriv, id, &state);
-			if (r == 0) {
-				args->out.state.flags = state.flags;
-				args->out.state.hangs = state.hangs;
-			}
+			r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
 			break;
 		default:
 			return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 61cf5ad..3448d9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1781,6 +1781,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
 	}
 
 	adev->needs_reset = false;
+	atomic_inc(&adev->gpu_reset_counter);
 
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 65da7cd..46580e9 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -149,6 +149,12 @@ union drm_amdgpu_bo_list {
 
 #define AMDGPU_CTX_OP_STATE_RUNNING	1
 
+/* GPU reset status */
+#define AMDGPU_CTX_NO_RESET		0
+#define AMDGPU_CTX_GUILTY_RESET		1 /* this the context caused it */
+#define AMDGPU_CTX_INNOCENT_RESET	2 /* some other context caused it */
+#define AMDGPU_CTX_UNKNOWN_RESET	3 /* unknown cause */
+
 struct drm_amdgpu_ctx_in {
 	uint32_t	op;
 	uint32_t	flags;
@@ -164,7 +170,10 @@ union drm_amdgpu_ctx_out {
 
 		struct {
 			uint64_t	flags;
-			uint64_t	hangs;
+			/** Number of resets caused by this context so far. */
+			uint32_t	hangs;
+			/** Reset status since the last call of the ioctl. */
+			uint32_t	reset_status;
 		} state;
 };
 
-- 
1.8.3.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

  parent reply	other threads:[~2015-05-27  3:20 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-27  3:19 [PATCH 00/88] amdgpu driver updates Alex Deucher
2015-05-27  3:19 ` [PATCH 29/88] drm/amdgpu: add amdgpu uapi header (v4) Alex Deucher
2015-06-05 18:57   ` Jerome Glisse
2015-06-09  2:51     ` Alex Deucher
2015-06-09  9:13       ` Marek Olšák
2015-05-27  3:19 ` [PATCH 30/88] drm/amdgpu: add atombios headers Alex Deucher
2015-05-27  3:19 ` [PATCH 31/88] drm/amdgpu: add clearstate_defs.h Alex Deucher
2015-05-27  3:19 ` [PATCH 32/88] drm/amdgpu: add ppsmc.h Alex Deucher
2015-05-27  3:19 ` [PATCH 33/88] drm/amdgpu: add amdgpu_family.h Alex Deucher
2015-05-27  3:19 ` [PATCH 34/88] drm/amdgpu: add amdgpu.h (v2) Alex Deucher
2015-05-27  3:19 ` [PATCH 36/88] drm/amdgpu: fix const warnings in amdgpu_connectors.c Alex Deucher
2015-05-27  3:19 ` [PATCH 37/88] drm/amdgpu: Do not directly dereference pointers to BIOS area Alex Deucher
2015-05-27  3:19 ` [PATCH 40/88] drm/amdgpu: add CIK pci ids Alex Deucher
2015-05-27  3:19 ` [PATCH 41/88] drm/amdgpu: add VI " Alex Deucher
2015-05-27  3:19 ` [PATCH 42/88] drm/amdgpu: drop ttm two ended allocation Alex Deucher
2015-05-27  3:19 ` [PATCH 43/88] drm/amdgpu: fix error check issue in amdgpu_mn_invalidate_range_start Alex Deucher
2015-05-27  3:19 ` [PATCH 44/88] drm/amdgpu: fix bug occurs when bo_list is NULL Alex Deucher
2015-05-27  3:19 ` [PATCH 45/88] drm/amdgpu: let bo_list handler start from 1 Alex Deucher
2015-05-27  3:19 ` [PATCH 46/88] drm/amdgpu: fix error handling in cz_dpm_hw_fini/cz_dpm_suspend Alex Deucher
2015-05-27  3:19 ` [PATCH 47/88] drm/amdgpu: memset gds_info struct in info ioctl Alex Deucher
2015-05-27  3:19 ` [PATCH 48/88] drm/amdgpu fix amdgpu.dpm=0 (v2) Alex Deucher
2015-05-27  3:19 ` [PATCH 49/88] drm/amdgpu: remove AMDGPU_GEM_CREATE_CPU_GTT_UC Alex Deucher
2015-06-06 17:08   ` Oded Gabbay
2015-06-06 19:09     ` Christian König
2015-06-06 19:32       ` Oded Gabbay
2015-06-12 15:47   ` Emil Velikov
2015-06-12 16:41     ` Bridgman, John
2015-06-12 16:57       ` Emil Velikov
2015-05-27  3:19 ` [PATCH 50/88] drm/amdgpu: fix userptr BO unpin bug (v2) Alex Deucher
2015-05-27  3:19 ` [PATCH 51/88] drm/amdgpu: fix userptr lockup Alex Deucher
2015-05-27  3:19 ` [PATCH 52/88] drm/amdgpu: remove unsafe context releasing Alex Deucher
2015-05-27  3:19 ` [PATCH 53/88] drm/amdgpu: make the CTX ioctl thread-safe Alex Deucher
2015-05-27  3:19 ` [PATCH 54/88] drm/amdgpu: allow unaligned memory access (v2) Alex Deucher
2015-05-27  3:19 ` [PATCH 55/88] drm/amdgpu: add ctx_id to the WAIT_CS IOCTL (v4) Alex Deucher
2015-05-27  3:19 ` [PATCH 56/88] drm/amdgpu: check context id for context switching (v2) Alex Deucher
2015-05-27  3:19 ` [PATCH 57/88] drm/amdgpu: add flags for amdgpu_ib structure Alex Deucher
2015-05-27  3:19 ` [PATCH 58/88] drm/amdgpu: add CE preamble flag v3 Alex Deucher
2015-05-27  3:19 ` [PATCH 59/88] drm/amdgpu: always emit GDS switch Alex Deucher
2015-05-27  3:19 ` [PATCH 60/88] drm/amdgpu: cleanup HDP flush handling Alex Deucher
2015-05-27  3:20 ` [PATCH 61/88] drm/amdgpu: fix dereference before check Alex Deucher
2015-05-27  3:20 ` [PATCH 62/88] drm/amdgpu: fix context switch Alex Deucher
2015-05-27  3:20 ` [PATCH 63/88] drm/amdgpu: expose the max virtual address Alex Deucher
2015-05-27  3:20 ` [PATCH 64/88] drm/amdgpu: do necessary NULL check Alex Deucher
2015-05-27  3:20 ` [PATCH 65/88] drm/amdgpu: switch to amdgpu folder for firmware files v2 Alex Deucher
2015-05-27  3:20 ` [PATCH 66/88] drm/amdgpu: rewording some left radeons Alex Deucher
2015-05-27  3:20 ` [PATCH 67/88] drm/amdgpu: add new bonaire pci id Alex Deucher
2015-05-27  3:20 ` [PATCH 68/88] drm/amdgpu: add some new tonga pci ids Alex Deucher
2015-05-27  3:20 ` Alex Deucher [this message]
2015-05-27  3:20 ` [PATCH 70/88] drm/amdgpu: take the mode_config mutex when handling hpds Alex Deucher
2015-05-27  3:20 ` [PATCH 71/88] drm/amdgpu: make some DP parameters const Alex Deucher
2015-05-27  3:20 ` [PATCH 72/88] drm/amdgpu: simplify DPCD debug output Alex Deucher
2015-05-27  3:20 ` [PATCH 73/88] drm/amdgpu: retry dcpd fetch Alex Deucher
2015-05-27  3:20 ` [PATCH 74/88] drm/amdgpu: fix VM_CONTEXT*_PAGE_TABLE_END_ADDR handling Alex Deucher
2015-05-27  3:20 ` [PATCH 75/88] drm/amdgpu: enforce AMDGPU_GEM_CREATE_NO_CPU_ACCESS Alex Deucher
2015-05-27  3:20 ` [PATCH 76/88] drm/amdgpu: validate amdgpu_vm_bo_map parameters Alex Deucher
2015-05-27  3:20 ` [PATCH 77/88] drm/amdgpu: actually use the VM map parameters Alex Deucher
2015-05-27  3:20 ` [PATCH 78/88] drm/amdgpu: don't set unused tiling flags Alex Deucher
2015-05-27  3:20 ` [PATCH 79/88] drm/amdgpu: rework " Alex Deucher
2015-05-27  3:20 ` [PATCH 80/88] drm/amdgpu: remove unused TRACE_SYSTEM_STRING define Alex Deucher
2015-05-27  3:20 ` [PATCH 81/88] drm/amdgpu: recalculate VCE firmware BO size Alex Deucher
2015-05-27  3:20 ` [PATCH 82/88] drm/amdgpu: implement VCE two instances support Alex Deucher
2015-05-27  3:20 ` [PATCH 83/88] drm/amdgpu: enable uvd dpm and powergating Alex Deucher
2015-05-27  3:20 ` [PATCH 84/88] drm/amdgpu: port fault_reserve_notify changes from radeon Alex Deucher
2015-05-27  3:20 ` [PATCH 85/88] drm/amdgpu: drop AMDGPU_FENCE_SIGNALED_SEQ Alex Deucher
2015-05-27  3:20 ` [PATCH 86/88] drm/amdgpu: drop allocation flag masks Alex Deucher
2015-05-27  3:20 ` [PATCH 87/88] drm/amdgpu: rename amdgpu_ip_funcs to amd_ip_funcs (v2) Alex Deucher
2015-05-27  3:20 ` [PATCH 88/88] drm/amdgpu: implement the allocation range (v2) Alex Deucher

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1432696827-3752-39-git-send-email-alexander.deucher@amd.com \
    --to=alexdeucher@gmail.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=marek.olsak@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).