Intel-XE Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
To: Ashutosh Dixit <ashutosh.dixit@intel.com>
Cc: intel-xe@lists.freedesktop.org
Subject: Re: [PATCH 08/17] drm/xe/oa: OA stream initialization (OAG)
Date: Tue, 19 Dec 2023 18:31:45 -0800	[thread overview]
Message-ID: <ZYJSEb52c5Jwx1Wq@unerlige-ril> (raw)
In-Reply-To: <20231208064329.2387604-9-ashutosh.dixit@intel.com>

On Thu, Dec 07, 2023 at 10:43:20PM -0800, Ashutosh Dixit wrote:
>Implement majority of OA stream initialization (as part of OA stream open)
>ioctl). OAG buffer is allocated for receiving perf counter samples from
>HW. OAG unit is initialized and the selected OA metric configuration is
>programmed into OAG unit HW using a command/batch buffer.
>
>Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>---
> drivers/gpu/drm/xe/regs/xe_gt_regs.h |   3 +
> drivers/gpu/drm/xe/xe_oa.c           | 397 +++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_oa_types.h     |  82 ++++++
> 3 files changed, 482 insertions(+)
>
>diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>index d318ec0efd7db..1b98b609f7fda 100644
>--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>@@ -156,6 +156,8 @@
>
> #define SQCNT1					XE_REG_MCR(0x8718)
> #define XELPMP_SQCNT1				XE_REG(0x8718)
>+#define   SQCNT1_PMON_ENABLE			REG_BIT(30)
>+#define   SQCNT1_OABPC				REG_BIT(29)
> #define   ENFORCE_RAR				REG_BIT(23)

REG_BIT(29) indentation seems to be off

>
> #define XEHP_SQCM				XE_REG_MCR(0x8724)
>@@ -365,6 +367,7 @@
> #define ROW_CHICKEN				XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED)
> #define   UGM_BACKUP_MODE			REG_BIT(13)
> #define   MDQ_ARBITRATION_MODE			REG_BIT(12)
>+#define   STALL_DOP_GATING_DISABLE		REG_BIT(5)
> #define   EARLY_EOT_DIS				REG_BIT(1)
>
> #define ROW_CHICKEN2				XE_REG_MCR(0xe4f4, XE_REG_OPTION_MASKED)
>diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
>index 9b0bd58fcbc06..d898610322d50 100644
>--- a/drivers/gpu/drm/xe/xe_oa.c
>+++ b/drivers/gpu/drm/xe/xe_oa.c
>@@ -6,15 +6,26 @@
> #include <linux/nospec.h>
> #include <linux/sysctl.h>
>
>+#include <drm/drm_drv.h>
>+#include <drm/xe_drm.h>
>+
>+#include "instructions/xe_mi_commands.h"
> #include "regs/xe_gt_regs.h"
> #include "regs/xe_oa_regs.h"
> #include "xe_device.h"
> #include "xe_exec_queue.h"
>+#include "xe_bb.h"
>+#include "xe_bo.h"
> #include "xe_gt.h"
>+#include "xe_gt_mcr.h"
> #include "xe_mmio.h"
> #include "xe_oa.h"
>+#include "xe_sched_job.h"
> #include "xe_perf.h"
>
>+#define DEFAULT_POLL_FREQUENCY_HZ 200
>+#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ)
>+
> static int xe_oa_sample_rate_hard_limit;
> static u32 xe_oa_max_sample_rate = 100000;
>
>@@ -63,6 +74,13 @@ struct xe_oa_open_param {
> 	struct xe_hw_engine *hwe;
> };
>
>+struct xe_oa_config_bo {
>+	struct llist_node node;
>+
>+	struct xe_oa_config *oa_config;
>+	struct xe_bb *bb;
>+};
>+
> #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x
>
> static const struct xe_oa_format oa_formats[] = {
>@@ -105,6 +123,381 @@ static void xe_oa_config_put(struct xe_oa_config *oa_config)
> 	kref_put(&oa_config->ref, xe_oa_config_release);
> }
>
>+static struct xe_oa_config *xe_oa_config_get(struct xe_oa_config *oa_config)
>+{
>+	return kref_get_unless_zero(&oa_config->ref) ? oa_config : NULL;
>+}
>+
>+static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_set)
>+{
>+	struct xe_oa_config *oa_config;
>+
>+	rcu_read_lock();
>+	oa_config = idr_find(&oa->metrics_idr, metrics_set);
>+	if (oa_config)
>+		oa_config = xe_oa_config_get(oa_config);
>+	rcu_read_unlock();
>+
>+	return oa_config;
>+}
>+
>+static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo)
>+{
>+	xe_oa_config_put(oa_bo->oa_config);
>+	xe_bb_free(oa_bo->bb, NULL);
>+	kfree(oa_bo);
>+}
>+
>+static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream)
>+{
>+	return &stream->hwe->oa_unit->regs;
>+}
>+
>+static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
>+{
>+	struct xe_sched_job *job;
>+	struct dma_fence *fence;
>+	long timeout;
>+	int err = 0;
>+
>+	/* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */
>+	job = xe_bb_create_job(stream->k_exec_q, bb);
>+	if (IS_ERR(job)) {
>+		err = PTR_ERR(job);
>+		goto exit;
>+	}
>+
>+	xe_sched_job_arm(job);
>+	fence = dma_fence_get(&job->drm.s_fence->finished);
>+	xe_sched_job_push(job);
>+
>+	timeout = dma_fence_wait_timeout(fence, false, HZ);
>+	dma_fence_put(fence);
>+	if (timeout < 0)
>+		err = timeout;
>+	else if (!timeout)
>+		err = -ETIME;
>+exit:
>+	return err;
>+}
>+
>+static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream)
>+{
>+	xe_bo_unpin_map_no_vm(stream->oa_buffer.bo);
>+}
>+
>+static void xe_oa_free_configs(struct xe_oa_stream *stream)
>+{
>+	struct xe_oa_config_bo *oa_bo, *tmp;
>+
>+	xe_oa_config_put(stream->oa_config);
>+	llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
>+		free_oa_config_bo(oa_bo);
>+}
>+
>+#define HAS_OA_BPC_REPORTING(xe) (GRAPHICS_VERx100(xe) >= 1255)
>+
>+static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
>+{
>+	u32 sqcnt1;
>+
>+	/*
>+	 * Wa_1508761755:xehpsdv, dg2
>+	 * Enable thread stall DOP gating and EU DOP gating.
>+	 */
>+	if (stream->oa->xe->info.platform == XE_DG2) {
>+		xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN,
>+					  _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE));
>+		xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2,
>+					  _MASKED_BIT_DISABLE(DISABLE_DOP_GATING));
>+	}
>+
>+	/* Make sure we disable noa to save power. */
>+	xe_mmio_rmw32(stream->gt, RPM_CONFIG1, GT_NOA_ENABLE, 0);
>+
>+	sqcnt1 = SQCNT1_PMON_ENABLE |
>+		 (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0);
>+
>+	/* Reset PMON Enable to save power. */
>+	xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, sqcnt1, 0);
>+}
>+
>+static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream)
>+{
>+	struct xe_bo *bo;
>+
>+	BUILD_BUG_ON_NOT_POWER_OF_2(XE_OA_BUFFER_SIZE);
>+	BUILD_BUG_ON(XE_OA_BUFFER_SIZE < SZ_128K || XE_OA_BUFFER_SIZE > SZ_16M);
>+
>+	bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL,
>+				  XE_OA_BUFFER_SIZE, ttm_bo_type_kernel,
>+				  XE_BO_CREATE_SYSTEM_BIT | XE_BO_CREATE_GGTT_BIT);
>+	if (IS_ERR(bo))
>+		return PTR_ERR(bo);
>+
>+	stream->oa_buffer.bo = bo;
>+	stream->oa_buffer.vaddr = bo->vmap.vaddr;
>+	return 0;
>+}
>+
>+static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs)
>+{
>+	u32 i;
>+
>+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
>+
>+	for (i = 0; i < n_regs; i++) {
>+		if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
>+			u32 n_lri = min_t(u32, n_regs - i,
>+					  MI_LOAD_REGISTER_IMM_MAX_REGS);
>+
>+			bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(n_lri);
>+		}
>+		bb->cs[bb->len++] = reg_data[i].addr.addr;
>+		bb->cs[bb->len++] = reg_data[i].value;
>+	}
>+}
>+
>+static int num_lri_dwords(int num_regs)
>+{
>+	int count = 0;
>+
>+	if (num_regs > 0) {
>+		count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS);
>+		count += num_regs * 2;
>+	}
>+
>+	return count;
>+}
>+
>+static struct xe_oa_config_bo *
>+__xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_config)
>+{
>+	struct xe_oa_config_bo *oa_bo;
>+	size_t config_length;
>+	struct xe_bb *bb;
>+
>+	oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
>+	if (!oa_bo)
>+		return ERR_PTR(-ENOMEM);
>+
>+	config_length = num_lri_dwords(oa_config->regs_len);
>+	config_length = ALIGN(sizeof(u32) * config_length, XE_PAGE_SIZE) / sizeof(u32);
>+
>+	bb = xe_bb_new(stream->gt, config_length, false);
>+	if (IS_ERR(bb))
>+		goto err_free;
>+
>+	write_cs_mi_lri(bb, oa_config->regs, oa_config->regs_len);
>+
>+	oa_bo->bb = bb;
>+	oa_bo->oa_config = xe_oa_config_get(oa_config);
>+	llist_add(&oa_bo->node, &stream->oa_config_bos);
>+
>+	return oa_bo;
>+err_free:
>+	kfree(oa_bo);
>+	return ERR_CAST(bb);
>+}
>+
>+static struct xe_oa_config_bo *xe_oa_alloc_config_buffer(struct xe_oa_stream *stream)
>+{
>+	struct xe_oa_config *oa_config = stream->oa_config;
>+	struct xe_oa_config_bo *oa_bo;
>+
>+	/* Look for the buffer in the already allocated BOs attached to the stream */
>+	llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) {
>+		if (oa_bo->oa_config == oa_config &&
>+		    memcmp(oa_bo->oa_config->uuid, oa_config->uuid,
>+			   sizeof(oa_config->uuid)) == 0)
>+			goto out;
>+	}
>+
>+	oa_bo = __xe_oa_alloc_config_buffer(stream, oa_config);
>+out:
>+	return oa_bo;
>+}
>+
>+static int xe_oa_emit_oa_config(struct xe_oa_stream *stream)
>+{
>+#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
>+	struct xe_oa_config_bo *oa_bo;
>+	int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US;
>+
>+	oa_bo = xe_oa_alloc_config_buffer(stream);
>+	if (IS_ERR(oa_bo)) {
>+		err = PTR_ERR(oa_bo);
>+		goto exit;
>+	}
>+
>+	err = xe_oa_submit_bb(stream, oa_bo->bb);
>+
>+	/* Additional empirical delay needed for NOA programming after registers are written */
>+	usleep_range(us, 2 * us);

Are we planning to signal user fence or something to indicate 
completion? I haven't tracked that aspect much.

The reset is familiar and lgtm,

Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>

Umesh

  reply	other threads:[~2023-12-20  2:31 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-08  6:43 [PATCH v7 00/17] Add OA functionality to Xe Ashutosh Dixit
2023-12-08  6:43 ` [PATCH 01/17] drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream types Ashutosh Dixit
2023-12-08  6:43 ` [PATCH 02/17] drm/xe/perf/uapi: Add perf_stream_paranoid sysctl Ashutosh Dixit
2023-12-14  0:57   ` Umesh Nerlige Ramappa
2023-12-19 20:28   ` Dixit, Ashutosh
2024-01-20  2:35     ` Dixit, Ashutosh
2024-01-24 14:10   ` Joel Granados
2023-12-08  6:43 ` [PATCH 03/17] drm/xe/oa/uapi: Add oa_max_sample_rate sysctl Ashutosh Dixit
2023-12-14  0:58   ` Umesh Nerlige Ramappa
2024-01-20  2:36     ` Dixit, Ashutosh
2024-01-24 14:11   ` Joel Granados
2023-12-08  6:43 ` [PATCH 04/17] drm/xe/oa/uapi: Add OA data formats Ashutosh Dixit
2023-12-19  1:11   ` Umesh Nerlige Ramappa
2023-12-19  1:17     ` Dixit, Ashutosh
2023-12-08  6:43 ` [PATCH 05/17] drm/xe/oa/uapi: Initialize OA units Ashutosh Dixit
2023-12-19 16:11   ` Umesh Nerlige Ramappa
2024-01-20  2:43     ` Dixit, Ashutosh
2023-12-08  6:43 ` [PATCH 06/17] drm/xe/oa/uapi: Add/remove OA config perf ops Ashutosh Dixit
2023-12-19 19:10   ` Umesh Nerlige Ramappa
2024-01-20  2:44     ` Dixit, Ashutosh
2023-12-08  6:43 ` [PATCH 07/17] drm/xe/oa/uapi: Define and parse OA stream properties Ashutosh Dixit
2023-12-09 22:53   ` Dixit, Ashutosh
2023-12-19  2:59   ` Dixit, Ashutosh
2023-12-19 16:26     ` Umesh Nerlige Ramappa
2023-12-19 16:29       ` Lionel Landwerlin
2023-12-19 16:40         ` Umesh Nerlige Ramappa
2023-12-19 17:48           ` Lionel Landwerlin
2023-12-19 23:23   ` Umesh Nerlige Ramappa
2024-01-20  2:48     ` Dixit, Ashutosh
2023-12-08  6:43 ` [PATCH 08/17] drm/xe/oa: OA stream initialization (OAG) Ashutosh Dixit
2023-12-20  2:31   ` Umesh Nerlige Ramappa [this message]
2024-01-20  2:49     ` Dixit, Ashutosh
2023-12-08  6:43 ` [PATCH 09/17] drm/xe/oa/uapi: Expose OA stream fd Ashutosh Dixit
2023-12-20  2:52   ` Umesh Nerlige Ramappa
2024-01-20  2:50     ` Dixit, Ashutosh
2023-12-08  6:43 ` [PATCH 10/17] drm/xe/oa/uapi: Read file_operation Ashutosh Dixit
2023-12-20  3:01   ` Umesh Nerlige Ramappa
2024-01-20  2:51     ` Dixit, Ashutosh
2023-12-08  6:43 ` [PATCH 11/17] drm/xe/oa: Disable overrun mode for Xe2+ OAG Ashutosh Dixit
2023-12-20  3:05   ` Umesh Nerlige Ramappa
2024-01-20  2:51     ` Dixit, Ashutosh
2023-12-08  6:43 ` [PATCH 12/17] drm/xe/oa: Add OAR support Ashutosh Dixit
2023-12-20  4:37   ` Umesh Nerlige Ramappa
2023-12-08  6:43 ` [PATCH 13/17] drm/xe/oa: Add OAC support Ashutosh Dixit
2023-12-20  4:59   ` Umesh Nerlige Ramappa
2024-01-20  2:52     ` FIXME " Dixit, Ashutosh
2023-12-08  6:43 ` [PATCH 14/17] drm/xe/oa/uapi: Query OA unit properties Ashutosh Dixit
2023-12-23  0:40   ` Umesh Nerlige Ramappa
2024-01-20  3:10     ` Dixit, Ashutosh
2023-12-08  6:43 ` [PATCH 15/17] drm/xe/oa/uapi: OA buffer mmap Ashutosh Dixit
2023-12-23  2:39   ` Umesh Nerlige Ramappa
2024-01-20  3:11     ` Dixit, Ashutosh
2024-02-06 23:51       ` Umesh Nerlige Ramappa
2024-01-02 11:16   ` Thomas Hellström
2024-01-08 19:50     ` Umesh Nerlige Ramappa
2024-01-09  5:14       ` Dixit, Ashutosh
2023-12-08  6:43 ` [PATCH 16/17] drm/xe/oa: Add MMIO trigger support Ashutosh Dixit
2023-12-20  4:35   ` Umesh Nerlige Ramappa
2023-12-08  6:43 ` [PATCH 17/17] drm/xe/oa: Override GuC RC with OA on PVC Ashutosh Dixit
2023-12-08  9:22 ` ✗ CI.Patch_applied: failure for Add OA functionality to Xe (rev7) Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZYJSEb52c5Jwx1Wq@unerlige-ril \
    --to=umesh.nerlige.ramappa@intel.com \
    --cc=ashutosh.dixit@intel.com \
    --cc=intel-xe@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox