All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jiang Liu <gerry@linux.alibaba.com>
To: alexander.deucher@amd.com, christian.koenig@amd.com,
	Xinhui.Pan@amd.com, airlied@gmail.com, simona@ffwll.ch,
	sunil.khatri@amd.com, lijo.lazar@amd.com, Hawking.Zhang@amd.com,
	mario.limonciello@amd.com, Jun.Ma2@amd.com,
	xiaogang.chen@amd.com, Kent.Russell@amd.com,
	shuox.liu@linux.alibaba.com, amd-gfx@lists.freedesktop.org
Cc: Jiang Liu <gerry@linux.alibaba.com>
Subject: [RFC PATCH 12/13] drm/amdgpu/nbio: improve the way to manage irq reference count
Date: Wed,  8 Jan 2025 22:00:04 +0800	[thread overview]
Message-ID: <f507147e97da99512a4f304e57697bae9cdce482.1736344725.git.gerry@linux.alibaba.com> (raw)
In-Reply-To: <cover.1736344725.git.gerry@linux.alibaba.com>

Refactor nbio related code to improve the way to manage irq reference
count. Originally amdgpu_irq_get() is called from ip_blocks[].late_init
and amdgpu_irq_put is called from ip_blocks[].hw_fini. The asymmetric
design may cause issue under certain conditions. So
1) introduce amdgpu_nbio_ras_early_fini() to undo work done by
   amdgpu_nbio_ras_late_init().
2) remove call of amdgpu_irq_put in xxxx_hw_fini().
3) record the status where reference count is held for specific irq.

Signed-off-by: Jiang Liu <gerry@linux.alibaba.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c | 16 +++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h |  1 +
 drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c   |  1 +
 drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c   |  1 +
 drivers/gpu/drm/amd/amdgpu/soc15.c       | 16 ----------------
 5 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
index c75ce91f94ab..b8a69ceec2e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c
@@ -64,13 +64,27 @@ int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *
 		r = amdgpu_irq_get(adev, &adev->nbio.ras_controller_irq, 0);
 		if (r)
 			goto late_fini;
+		amdgpu_ras_set_marker(adev, ras_block, AMDGPU_MARKER_IRQ0);
 		r = amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0);
 		if (r)
 			goto late_fini;
+		amdgpu_ras_set_marker(adev, ras_block, AMDGPU_MARKER_IRQ1);
 	}
 
 	return 0;
 late_fini:
-	amdgpu_ras_block_early_fini(adev, ras_block);
+	amdgpu_nbio_ras_early_fini(adev, ras_block);
 	return r;
 }
+
+void amdgpu_nbio_ras_early_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+	if (amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
+		if (amdgpu_ras_test_and_clear_marker(adev, ras_block, AMDGPU_MARKER_IRQ0))
+			amdgpu_irq_put(adev, &adev->nbio.ras_controller_irq, 0);
+		if (amdgpu_ras_test_and_clear_marker(adev, ras_block, AMDGPU_MARKER_IRQ1))
+			amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0);
+	}
+
+	amdgpu_ras_block_early_fini(adev, ras_block);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index 79c2f807b9fe..e1edf75602c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -117,6 +117,7 @@ struct amdgpu_nbio {
 
 int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev);
 int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
+void amdgpu_nbio_ras_early_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block);
 u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index 97782a73f4b0..6c727b77bb3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -665,6 +665,7 @@ struct amdgpu_nbio_ras nbio_v7_4_ras = {
 		},
 		.hw_ops = &nbio_v7_4_ras_hw_ops,
 		.ras_late_init = amdgpu_nbio_ras_late_init,
+		.ras_early_fini = amdgpu_nbio_ras_early_fini,
 	},
 	.handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring,
 	.handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring,
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
index 8a0a63ac88d2..684a38a16247 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c
@@ -703,6 +703,7 @@ struct amdgpu_nbio_ras nbio_v7_9_ras = {
 		},
 		.hw_ops = &nbio_v7_9_ras_hw_ops,
 		.ras_late_init = amdgpu_nbio_ras_late_init,
+		.ras_early_fini = amdgpu_nbio_ras_early_fini,
 	},
 	.handle_ras_controller_intr_no_bifring = nbio_v7_9_handle_ras_controller_intr_no_bifring,
 	.handle_ras_err_event_athub_intr_no_bifring = nbio_v7_9_handle_ras_err_event_athub_intr_no_bifring,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 6fcdeb265a22..1dca7d7c813c 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1299,22 +1299,6 @@ static int soc15_common_hw_fini(struct amdgpu_ip_block *ip_block)
 	if (amdgpu_sriov_vf(adev))
 		xgpu_ai_mailbox_put_irq(adev);
 
-	/*
-	 * For minimal init, late_init is not called, hence RAS irqs are not
-	 * enabled.
-	 */
-	if ((!amdgpu_sriov_vf(adev)) &&
-	    (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) &&
-	    adev->nbio.ras_if &&
-	    amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) {
-		if (adev->nbio.ras &&
-		    adev->nbio.ras->init_ras_controller_interrupt)
-			amdgpu_irq_put(adev, &adev->nbio.ras_controller_irq, 0);
-		if (adev->nbio.ras &&
-		    adev->nbio.ras->init_ras_err_event_athub_interrupt)
-			amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0);
-	}
-
 	return 0;
 }
 
-- 
2.43.5


  parent reply	other threads:[~2025-01-08 14:00 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-08 13:59 Jiang Liu
2025-01-08 13:59 ` [RFC PATCH 01/13] amdgpu: wrong array index to get ip block for PSP Jiang Liu
2025-01-08 15:02   ` Alex Deucher
2025-01-08 13:59 ` [RFC PATCH 02/13] drm/admgpu: add helper functions to track status for ras manager Jiang Liu
2025-01-08 13:59 ` [RFC PATCH 03/13] drm/amdgpu: add a flag to track ras debugfs creation status Jiang Liu
2025-01-08 17:19   ` Mario Limonciello
2025-01-10  3:19     ` Gerry Liu
2025-01-10 16:58       ` Mario Limonciello
2025-01-10 17:16         ` Alex Deucher
2025-01-08 13:59 ` [RFC PATCH 04/13] drm/amdgpu: free all resources on error recovery path of amdgpu_ras_init() Jiang Liu
2025-01-08 13:59 ` [RFC PATCH 05/13] drm/amdgpu: introduce a flag to track refcount held for features Jiang Liu
2025-01-08 13:59 ` [RFC PATCH 06/13] drm/amdgpu: enhance amdgpu_ras_block_late_fini() Jiang Liu
2025-01-08 13:59 ` [RFC PATCH 07/13] drm/amdgpu: enhance amdgpu_ras_pre_fini() to better support SR Jiang Liu
2025-01-08 14:00 ` [RFC PATCH 08/13] drm/admgpu: rename amdgpu_ras_pre_fini() to amdgpu_ras_early_fini() Jiang Liu
2025-01-08 14:00 ` [RFC PATCH 09/13] drm/amdgpu: make IP block state machine works in stack like way Jiang Liu
2025-01-08 17:04   ` Mario Limonciello
2025-01-08 14:00 ` [RFC PATCH 10/13] drm/admgpu: make device state machine work " Jiang Liu
2025-01-08 17:13   ` Mario Limonciello
2025-01-08 14:00 ` [RFC PATCH 11/13] drm/amdgpu/sdma: improve the way to manage irq reference count Jiang Liu
2025-01-08 14:00 ` Jiang Liu [this message]
2025-01-08 14:00 ` [RFC PATCH 13/13] drm/amdgpu/asic: make ip block operations symmetric by .early_fini() Jiang Liu
2025-01-08 14:10 ` Christian König
2025-01-08 16:33 ` Re: Mario Limonciello
2025-01-09  5:34   ` Re: Gerry Liu
2025-01-09 17:10     ` Re: Mario Limonciello
2025-01-13  1:19       ` Re: Gerry Liu
2025-01-13 21:59         ` Re: Mario Limonciello

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f507147e97da99512a4f304e57697bae9cdce482.1736344725.git.gerry@linux.alibaba.com \
    --to=gerry@linux.alibaba.com \
    --cc=Hawking.Zhang@amd.com \
    --cc=Jun.Ma2@amd.com \
    --cc=Kent.Russell@amd.com \
    --cc=Xinhui.Pan@amd.com \
    --cc=airlied@gmail.com \
    --cc=alexander.deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=christian.koenig@amd.com \
    --cc=lijo.lazar@amd.com \
    --cc=mario.limonciello@amd.com \
    --cc=shuox.liu@linux.alibaba.com \
    --cc=simona@ffwll.ch \
    --cc=sunil.khatri@amd.com \
    --cc=xiaogang.chen@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.