All of lore.kernel.org
 help / color / mirror / Atom feed
From: Luben Tuikov <luben.tuikov@amd.com>
To: jiadong.zhu@amd.com, amd-gfx@lists.freedesktop.org
Cc: Ray.Huang@amd.com
Subject: Re: [PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9(v3)
Date: Tue, 13 Sep 2022 11:23:47 -0400	[thread overview]
Message-ID: <822e2f83-e911-3356-e171-86c9dc7c1235@amd.com> (raw)
In-Reply-To: <20220909015022.557099-2-jiadong.zhu@amd.com>

Inlined:

On 2022-09-08 21:50, jiadong.zhu@amd.com wrote:
> From: "Jiadong.Zhu" <Jiadong.Zhu@amd.com>
> 
> Set ring functions with software ring callbacks
> on gfx9.
> 
> The software ring could be tested by debugfs_test_ib
> case.
> 
> v2: set sw_ring 2 to enable software ring by default.
> v3: remove the parameter for software ring enablement.
> 
> Signed-off-by: Jiadong.Zhu <Jiadong.Zhu@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h      |   1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  |   2 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |  16 +++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |   3 +-
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c    | 116 +++++++++++++++++++++--
>  5 files changed, 128 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 96d058c4cd4b..525df0b4d55f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -207,6 +207,7 @@ extern bool amdgpu_ignore_bad_page_threshold;
>  extern struct amdgpu_watchdog_timer amdgpu_watchdog_timer;
>  extern int amdgpu_async_gfx_ring;
>  extern int amdgpu_mcbp;
> +extern int amdgpu_sw_ring;
>  extern int amdgpu_discovery;
>  extern int amdgpu_mes;
>  extern int amdgpu_mes_kiq;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 0de8e3cd0f1c..5eec82014f0a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -348,6 +348,8 @@ struct amdgpu_gfx {
>  
>  	bool				is_poweron;
>  
> +	/*software ring*/

Isn't is more aestethic to put spaces around? Like this:
/* software ring */
?

Please run your patches through scripts/checkpatch.pl.

> +	unsigned						num_sw_gfx_rings;
>  	struct amdgpu_ring_mux			muxer;
>  };
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index 13db99d653bd..5b70a2c36d81 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -33,6 +33,7 @@
>  
>  #include <drm/amdgpu_drm.h>
>  #include "amdgpu.h"
> +#include "amdgpu_sw_ring.h"
>  #include "atom.h"
>  
>  /*
> @@ -121,6 +122,11 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
>  {
>  	uint32_t count;
>  
> +	if (ring->is_sw_ring) {
> +		amdgpu_sw_ring_commit(ring);
> +		return;
> +	}
> +
>  	/* We pad to match fetch size */
>  	count = ring->funcs->align_mask + 1 -
>  		(ring->wptr & ring->funcs->align_mask);
> @@ -183,6 +189,11 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
>  	u32 *num_sched;
>  	u32 hw_ip;
>  
> +	if (adev->gfx.num_sw_gfx_rings > 0 && ring->is_sw_ring) {
> +		return amdgpu_sw_ring_init(adev, ring, max_dw, irq_src, irq_type,
> +			hw_prio, sched_score);
> +	}
> +
>  	/* Set the hw submission limit higher for KIQ because
>  	 * it's used for a number of gfx/compute tasks by both
>  	 * KFD and KGD which may have outstanding fences and
> @@ -343,7 +354,10 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
>   */
>  void amdgpu_ring_fini(struct amdgpu_ring *ring)
>  {
> -
> +	if (ring->is_sw_ring) {
> +		amdgpu_sw_ring_fini(ring);
> +		return;
> +	}
>  	/* Not to finish a ring which is not initialized */
>  	if (!(ring->adev) ||
>  	    (!ring->is_mes_queue && !(ring->adev->rings[ring->idx])))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index fe33a683bfba..ba6d8c753f7e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -38,7 +38,8 @@ struct amdgpu_vm;
>  /* max number of rings */
>  #define AMDGPU_MAX_RINGS		28
>  #define AMDGPU_MAX_HWIP_RINGS		8
> -#define AMDGPU_MAX_GFX_RINGS		2
> +/*2 software ring and 1 real ring*/
> +#define AMDGPU_MAX_GFX_RINGS		3
>  #define AMDGPU_MAX_COMPUTE_RINGS	8
>  #define AMDGPU_MAX_VCE_RINGS		3
>  #define AMDGPU_MAX_UVD_ENC_RINGS	2
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index 5349ca4d19e3..774e44e1074a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -47,6 +47,7 @@
>  
>  #include "amdgpu_ras.h"
>  
> +#include "amdgpu_sw_ring.h"
>  #include "gfx_v9_4.h"
>  #include "gfx_v9_0.h"
>  #include "gfx_v9_4_2.h"
> @@ -55,7 +56,8 @@
>  #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
>  #include "asic_reg/gc/gc_9_0_default.h"
>  
> -#define GFX9_NUM_GFX_RINGS     1
> +#define GFX9_NUM_GFX_RINGS     3
> +#define GFX9_NUM_SW_GFX_RINGS  2
>  #define GFX9_MEC_HPD_SIZE 4096
>  #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
>  #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
> @@ -2270,6 +2272,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
>  static int gfx_v9_0_sw_init(void *handle)
>  {
>  	int i, j, k, r, ring_id;
> +	unsigned int hw_prio;
>  	struct amdgpu_ring *ring;
>  	struct amdgpu_kiq *kiq;
>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> @@ -2356,13 +2359,40 @@ static int gfx_v9_0_sw_init(void *handle)
>  			sprintf(ring->name, "gfx_%d", i);
>  		ring->use_doorbell = true;
>  		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
> +		ring->is_sw_ring = (adev->gfx.num_sw_gfx_rings > 1) && (i > 0);
> +
> +		if (adev->gfx.num_sw_gfx_rings > 1 && i == 2)
> +			hw_prio = AMDGPU_RING_PRIO_2;
> +		else
> +			hw_prio = AMDGPU_RING_PRIO_DEFAULT;
> +		if (adev->gfx.num_sw_gfx_rings > 0 && i == 0)
> +			ring->no_scheduler = true;
> +
>  		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
>  				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
> -				     AMDGPU_RING_PRIO_DEFAULT, NULL);
> +				     hw_prio, NULL);
>  		if (r)
>  			return r;
> +
> +		if (ring->is_sw_ring)
> +			ring->wptr = 0;
>  	}
>  
> +	/*init the muxer and add sw rings */
> +	if (adev->gfx.num_sw_gfx_rings > 0) {
> +		r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0]);
> +		if (r) {
> +			DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
> +			return r;
> +		}
> +		for (i = 1; i < adev->gfx.num_gfx_rings; i++) {
> +			r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, &adev->gfx.gfx_ring[i]);
> +			if (r) {
> +				DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
> +				return r;
> +			}
> +		}
> +	}
>  	/* set up the compute queues - allocate horizontally across pipes */
>  	ring_id = 0;
>  	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
> @@ -2413,6 +2443,9 @@ static int gfx_v9_0_sw_fini(void *handle)
>  	int i;
>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>  
> +	if (adev->gfx.num_sw_gfx_rings > 0)
> +		amdgpu_ring_mux_fini(&adev->gfx.muxer);
> +
>  	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>  		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
>  	for (i = 0; i < adev->gfx.num_compute_rings; i++)
> @@ -4709,8 +4742,9 @@ static int gfx_v9_0_early_init(void *handle)
>  	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
>  	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
>  		adev->gfx.num_gfx_rings = 0;
> -	else
> -		adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
> +
> +	adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
> +	adev->gfx.num_sw_gfx_rings = GFX9_NUM_SW_GFX_RINGS;
>  	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
>  					  AMDGPU_MAX_COMPUTE_RINGS);
>  	gfx_v9_0_set_kiq_pm4_funcs(adev);
> @@ -5877,7 +5911,11 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
>  
>  	switch (me_id) {
>  	case 0:
> -		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
> +		if (adev->gfx.num_sw_gfx_rings > 1) {
> +			for (i = 1; i <= adev->gfx.num_sw_gfx_rings; i++)
> +				amdgpu_fence_process(&adev->gfx.gfx_ring[i]);
> +		} else
> +			amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
>  		break;
>  	case 1:
>  	case 2:
> @@ -6882,6 +6920,62 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
>  	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
>  };
>  
> +
> +static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
> +	.type = AMDGPU_RING_TYPE_GFX,
> +	.align_mask = 0xff,
> +	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
> +	.support_64bit_ptrs = true,
> +	.secure_submission_supported = true,
> +	.vmhub = AMDGPU_GFXHUB_0,
> +	.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
> +	.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
> +	.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
> +	.emit_frame_size = /* totally 242 maximum if 16 IBs */
> +		5 +  /* COND_EXEC */
> +		7 +  /* PIPELINE_SYNC */
> +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
> +		2 + /* VM_FLUSH */
> +		8 +  /* FENCE for VM_FLUSH */
> +		20 + /* GDS switch */
> +		4 + /* double SWITCH_BUFFER,
> +		     * the first COND_EXEC jump to the place just
> +		     * prior to this double SWITCH_BUFFER
> +		     */
> +		5 + /* COND_EXEC */
> +		7 +	 /*	HDP_flush */
> +		4 +	 /*	VGT_flush */
> +		14 + /*	CE_META */
> +		31 + /*	DE_META */
> +		3 + /* CNTX_CTRL */
> +		5 + /* HDP_INVL */
> +		8 + 8 + /* FENCE x2 */
> +		2 + /* SWITCH_BUFFER */
> +		7, /* gfx_v9_0_emit_mem_sync */
> +	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
> +	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
> +	.emit_fence = gfx_v9_0_ring_emit_fence,
> +	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
> +	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
> +	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
> +	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
> +	.test_ring = gfx_v9_0_ring_test_ring,
> +	.test_ib = gfx_v9_0_ring_test_ib,
> +	.insert_nop = amdgpu_ring_insert_nop,
> +	.pad_ib = amdgpu_ring_generic_pad_ib,
> +	.emit_switch_buffer = gfx_v9_ring_emit_sb,
> +	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
> +	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
> +	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
> +	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
> +	.emit_wreg = gfx_v9_0_ring_emit_wreg,
> +	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
> +	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
> +	.soft_recovery = gfx_v9_0_ring_soft_recovery,
> +	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
> +};
> +
>  static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
>  	.type = AMDGPU_RING_TYPE_COMPUTE,
>  	.align_mask = 0xff,
> @@ -6956,9 +7050,15 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
>  
>  	adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
>  
> -	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> -		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
> -
> +	if (adev->gfx.num_sw_gfx_rings > 0) {
> +		//first one is the real ring
> +		adev->gfx.gfx_ring[0].funcs = &gfx_v9_0_ring_funcs_gfx;
> +		for (i = 1; i <= adev->gfx.num_sw_gfx_rings; i++)
> +			adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
> +	} else {
> +		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> +			adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
> +	}
>  	for (i = 0; i < adev->gfx.num_compute_rings; i++)
>  		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
>  }

Regards,
-- 
Luben

  parent reply	other threads:[~2022-09-13 15:23 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-09  1:50 [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3) jiadong.zhu
2022-09-09  1:50 ` [PATCH 2/4] drm/amdgpu: Add software ring callbacks for gfx9(v3) jiadong.zhu
2022-09-09 16:46   ` Andrey Grodzovsky
2022-09-13 15:23   ` Luben Tuikov [this message]
2022-09-09  1:50 ` [PATCH 3/4] drm/amdgpu: Modify unmap_queue format for gfx9(v2) jiadong.zhu
2022-09-09 16:48   ` Andrey Grodzovsky
2022-09-09  1:50 ` [PATCH 4/4] drm/amdgpu: Implement OS triggered MCBP(v2) jiadong.zhu
2022-09-09 17:02   ` Andrey Grodzovsky
2022-09-13  1:32     ` Zhu, Jiadong
2022-09-13 15:47   ` Luben Tuikov
2022-09-09 14:24 ` [PATCH 1/4] drm/amdgpu: Introduce gfx software ring(v3) Christian König
2022-09-12 14:31   ` Luben Tuikov
2022-09-09 16:45 ` Andrey Grodzovsky
2022-09-12 10:20   ` Christian König
2022-09-12 13:22     ` Andrey Grodzovsky
2022-09-12 13:27       ` Christian König
2022-09-12 15:34         ` Andrey Grodzovsky
2022-09-12 15:51           ` Liu, Shaoyun
2022-09-12 16:23             ` Christian König
2022-09-12 16:22           ` Christian König
2022-09-12 16:45             ` Andrey Grodzovsky
2022-09-13  1:44               ` Zhu, Jiadong
2022-09-13  2:00                 ` Andrey Grodzovsky
2022-09-13  7:25                   ` Christian König
2022-09-13 15:07                     ` Andrey Grodzovsky
2022-09-13 15:12 ` Luben Tuikov
2022-09-14  2:34   ` Zhu, Jiadong
2022-09-14  4:30     ` Luben Tuikov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=822e2f83-e911-3356-e171-86c9dc7c1235@amd.com \
    --to=luben.tuikov@amd.com \
    --cc=Ray.Huang@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=jiadong.zhu@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.