From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id E74A6C52D6F for ; Tue, 20 Aug 2024 02:11:46 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 9039410E496; Tue, 20 Aug 2024 02:11:46 +0000 (UTC) Authentication-Results: gabe.freedesktop.org; dkim=pass (2048-bit key; unprotected) header.d=intel.com header.i=@intel.com header.b="EsiAHkqY"; dkim-atps=neutral Received: from mgamail.intel.com (mgamail.intel.com [198.175.65.12]) by gabe.freedesktop.org (Postfix) with ESMTPS id 2198410E361 for ; Tue, 20 Aug 2024 02:11:45 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1724119905; x=1755655905; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=efsFy02Pi8QpTrBdRUsWh64HXfoKb36CqXnKyBAnB+g=; b=EsiAHkqYj4y1Ga5NyylWS7npUg0qw9jQgUWBtbMUlXyQjYebqB9OER/M Osiwy0KfWy/ZYvEvPp3bNdW8gCg/6c2zX43IK8V2VtpUJyWHlPYUklq4i ls5CY1EWaL29IksK0+ujoya23XDiGHVS/VhMcym6CLMIFOLXqVa2I7LgO hByyio53VUoxSfe/2w6gYpPLkEF/E36LFnnAdUXZ+aqTCaJUZZdJ/crUO +NKv+8cvSwE4+nGio0QPlBZ+6KHqf8JqABiyS6I4e4QE/FtoLgbm9DVfK UkkIlHsGYt+dwE3PS1XYudzMgV3B7v/IoA3mA9S/dfQtJD1Jgk7X6/St1 g==; X-CSE-ConnectionGUID: urXB0BGmQZ6s1oNnw6FsQQ== X-CSE-MsgGUID: jCtrOE8/Rpyqbk1T9ZcnsA== X-IronPort-AV: E=McAfee;i="6700,10204,11169"; a="33780406" X-IronPort-AV: E=Sophos;i="6.10,160,1719903600"; d="scan'208";a="33780406" Received: from orviesa007.jf.intel.com ([10.64.159.147]) by orvoesa104.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 19 Aug 2024 19:11:44 -0700 X-CSE-ConnectionGUID: fSoSBddwQeOvBYE5u+Aiaw== X-CSE-MsgGUID: D2UMrqSKSdejTyWYcFW46Q== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="6.10,160,1719903600"; d="scan'208";a="61126104" Received: from guc-pnp-dev-box-1.fm.intel.com ([10.1.27.7]) by orviesa007.jf.intel.com with ESMTP; 19 Aug 2024 19:11:45 -0700 From: Zhanjun Dong To: intel-xe@lists.freedesktop.org Cc: Zhanjun Dong Subject: [PATCH v16 3/7] drm/xe/guc: Add capture size check in GuC log buffer Date: Mon, 19 Aug 2024 19:11:38 -0700 Message-Id: <20240820021142.436536-4-zhanjun.dong@intel.com> X-Mailer: git-send-email 2.34.1 In-Reply-To: <20240820021142.436536-1-zhanjun.dong@intel.com> References: <20240820021142.436536-1-zhanjun.dong@intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: intel-xe@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Intel Xe graphics driver List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-xe-bounces@lists.freedesktop.org Sender: "Intel-xe" Capture-nodes generated by GuC are placed in the GuC capture ring buffer which is a sub-region of the larger Guc-Log-buffer. Add capture output size check before allocating the shared buffer. Signed-off-by: Zhanjun Dong --- drivers/gpu/drm/xe/abi/guc_log_abi.h | 20 ++++++ drivers/gpu/drm/xe/xe_guc_capture.c | 91 +++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_guc_log.c | 65 ++++++++++++++++++++ drivers/gpu/drm/xe/xe_guc_log.h | 7 ++- 4 files changed, 181 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/xe/abi/guc_log_abi.h diff --git a/drivers/gpu/drm/xe/abi/guc_log_abi.h b/drivers/gpu/drm/xe/abi/guc_log_abi.h new file mode 100644 index 000000000000..10db4ffaa17f --- /dev/null +++ b/drivers/gpu/drm/xe/abi/guc_log_abi.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _ABI_GUC_LOG_ABI_H +#define _ABI_GUC_LOG_ABI_H + +#include + +/* GuC logging buffer types */ +enum guc_log_buffer_type { + GUC_LOG_BUFFER_CRASH_DUMP, + GUC_LOG_BUFFER_DEBUG, + GUC_LOG_BUFFER_CAPTURE, +}; + +#define GUC_LOG_BUFFER_TYPE_MAX 3 + +#endif diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index 78670ebf43d6..98395a171078 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -22,6 +22,7 @@ #include "xe_gt_mcr.h" #include "xe_gt_printk.h" #include "xe_guc.h" +#include "xe_guc_ads.h" #include "xe_guc_capture.h" #include "xe_guc_capture_types.h" #include "xe_guc_ct.h" @@ -638,6 +639,93 @@ size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc) return PAGE_ALIGN(total_size); } +static int guc_capture_output_size_est(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + int capture_size = 0; + size_t tmp = 0; + + if (!guc->capture) + return -ENODEV; + + /* + * If every single engine-instance suffered a failure in quick succession but + * were all unrelated, then a burst of multiple error-capture events would dump + * registers for every one engine instance, one at a time. In this case, GuC + * would even dump the global-registers repeatedly. + * + * For each engine instance, there would be 1 x guc_state_capture_group_t output + * followed by 3 x guc_state_capture_t lists. The latter is how the register + * dumps are split across different register types (where the '3' are global vs class + * vs instance). + */ + for_each_hw_engine(hwe, gt, id) { + enum guc_capture_list_class_type capture_class; + + capture_class = xe_engine_class_to_guc_capture_class(hwe->class); + capture_size += sizeof(struct guc_state_capture_group_header_t) + + (3 * sizeof(struct guc_state_capture_header_t)); + + if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_GLOBAL, + 0, &tmp, true)) + capture_size += tmp; + if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS, + capture_class, &tmp, true)) + capture_size += tmp; + /* Estimate steering register size for rcs/ccs */ + if (capture_class == GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE) { + int total = guc_capture_get_steer_reg_num(guc_to_xe(guc)) * + XE_MAX_DSS_FUSE_BITS; + + capture_size += PAGE_ALIGN((sizeof(struct guc_debug_capture_list)) + + (total * sizeof(struct guc_mmio_reg))); + } + if (!guc_capture_getlistsize(guc, 0, GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE, + capture_class, &tmp, true)) + capture_size += tmp; + } + + return capture_size; +} + +/* + * Add on a 3x multiplier to allow for multiple back-to-back captures occurring + * before the Xe can read the data out and process it + */ +#define GUC_CAPTURE_OVERBUFFER_MULTIPLIER 3 + +static void check_guc_capture_size(struct xe_guc *guc) +{ + int capture_size = guc_capture_output_size_est(guc); + int spare_size = capture_size * GUC_CAPTURE_OVERBUFFER_MULTIPLIER; + u32 buffer_size = xe_guc_log_section_size_capture(&guc->log); + + /* + * NOTE: capture_size is much smaller than the capture region + * allocation (DG2: <80K vs 1MB). + * Additionally, its based on space needed to fit all engines getting + * reset at once within the same G2H handler task slot. This is very + * unlikely. However, if GuC really does run out of space for whatever + * reason, we will see an separate warning message when processing the + * G2H event capture-notification, search for: + * xe_guc_STATE_CAPTURE_EVENT_STATUS_NOSPACE. + */ + if (capture_size < 0) + xe_gt_dbg(guc_to_gt(guc), + "Failed to calculate error state capture buffer minimum size: %d!\n", + capture_size); + if (capture_size > buffer_size) + xe_gt_dbg(guc_to_gt(guc), "Error state capture buffer maybe small: %d < %d\n", + buffer_size, capture_size); + else if (spare_size > buffer_size) + xe_gt_dbg(guc_to_gt(guc), + "Error state capture buffer lacks spare size: %d < %d (min = %d)\n", + buffer_size, spare_size, capture_size); +} + /* * xe_guc_capture_steered_list_init - Init steering register list * @guc: The GuC object @@ -657,11 +745,12 @@ int xe_guc_capture_steered_list_init(struct xe_guc *guc) * the end of the pre-populated render list. */ guc_capture_alloc_steered_lists(guc); + check_guc_capture_size(guc); return 0; } -/** +/* * xe_guc_capture_init - Init for GuC register capture * @guc: The GuC object * diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index a37ee3419428..d6b5ac522b6c 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -96,3 +96,68 @@ int xe_guc_log_init(struct xe_guc_log *log) return 0; } + +static u32 xe_guc_log_section_size_crash(struct xe_guc_log *log) +{ + return CRASH_BUFFER_SIZE; +} + +static u32 xe_guc_log_section_size_debug(struct xe_guc_log *log) +{ + return DEBUG_BUFFER_SIZE; +} + +/** + * xe_guc_log_section_size_capture - Get capture buffer size within log sections. + * @log: The log object. + * + * This function will return the capture buffer size within log sections. + * + * Return: capture buffer size. + */ +u32 xe_guc_log_section_size_capture(struct xe_guc_log *log) +{ + return CAPTURE_BUFFER_SIZE; +} + +/** + * xe_guc_get_log_buffer_size - Get log buffer size for a type. + * @log: The log object. + * @type: The log buffer type + * + * Return: buffer size. + */ +u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type) +{ + switch (type) { + case GUC_LOG_BUFFER_CRASH_DUMP: + return xe_guc_log_section_size_crash(log); + case GUC_LOG_BUFFER_DEBUG: + return xe_guc_log_section_size_debug(log); + case GUC_LOG_BUFFER_CAPTURE: + return xe_guc_log_section_size_capture(log); + } + return 0; +} + +/** + * xe_guc_get_log_buffer_offset - Get offset in log buffer for a type. + * @log: The log object. + * @type: The log buffer type + * + * This function will return the offset in the log buffer for a type. + * Return: buffer offset. + */ +u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type) +{ + enum guc_log_buffer_type i; + u32 offset = PAGE_SIZE;/* for the log_buffer_states */ + + for (i = GUC_LOG_BUFFER_CRASH_DUMP; i < GUC_LOG_BUFFER_TYPE_MAX; ++i) { + if (i == type) + break; + offset += xe_guc_get_log_buffer_size(log, i); + } + + return offset; +} diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h index 2d25ab28b4b3..87ecd1814854 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.h +++ b/drivers/gpu/drm/xe/xe_guc_log.h @@ -7,6 +7,7 @@ #define _XE_GUC_LOG_H_ #include "xe_guc_log_types.h" +#include "abi/guc_log_abi.h" struct drm_printer; @@ -17,7 +18,7 @@ struct drm_printer; #else #define CRASH_BUFFER_SIZE SZ_8K #define DEBUG_BUFFER_SIZE SZ_64K -#define CAPTURE_BUFFER_SIZE SZ_16K +#define CAPTURE_BUFFER_SIZE SZ_1M #endif /* * While we're using plain log level in i915, GuC controls are much more... @@ -45,4 +46,8 @@ xe_guc_log_get_level(struct xe_guc_log *log) return log->level; } +u32 xe_guc_log_section_size_capture(struct xe_guc_log *log); +u32 xe_guc_get_log_buffer_size(struct xe_guc_log *log, enum guc_log_buffer_type type); +u32 xe_guc_get_log_buffer_offset(struct xe_guc_log *log, enum guc_log_buffer_type type); + #endif -- 2.34.1