From: Ankit Navik <ankit.p.navik@intel.com>
To: intel-gfx@lists.freedesktop.org
Cc: ankit.p.navik@intel.com
Subject: [PATCH v4 2/3] drm/i915: set optimum eu/slice/sub-slice configuration based on load type
Date: Thu, 14 Mar 2019 14:06:54 +0530 [thread overview]
Message-ID: <1552552615-6703-3-git-send-email-ankit.p.navik@intel.com> (raw)
In-Reply-To: <1552552615-6703-1-git-send-email-ankit.p.navik@intel.com>
From: Praveen Diwakar <praveen.diwakar@intel.com>
This patch will select optimum eu/slice/sub-slice configuration based on
type of load (low, medium, high) as input.
Based on our readings and experiments we have predefined set of optimum
configuration for each platform(CHT, KBL).
i915_gem_context_set_load_type will select optimum configuration from
pre-defined optimum configuration table(opt_config).
It also introduce flag update_render_config which can set by any governor.
v2:
* Move static optimum_config to device init time.
* Rename function to appropriate name, fix data types and patch ordering.
* Rename prev_load_type to pending_load_type. (Tvrtko Ursulin)
v3:
* Add safe guard check in i915_gem_context_set_load_type.
* Rename struct from optimum_config to i915_sseu_optimum_config to
avoid namespace clashes.
* Reduces memcpy for space efficient.
* Rebase.
* Improved commit message. (Tvrtko Ursulin)
v4:
* Move optimum config table to file scope. (Tvrtko Ursulin)
Cc: Kedar J Karanje <kedar.j.karanje@intel.com>
Cc: Yogesh Marathe <yogesh.marathe@intel.com>
Signed-off-by: Praveen Diwakar <praveen.diwakar@intel.com>
Signed-off-by: Aravindan Muthukumar <aravindan.muthukumar@intel.com>
Signed-off-by: Ankit Navik <ankit.p.navik@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 5 ++++
drivers/gpu/drm/i915/i915_gem_context.c | 20 ++++++++++++++
drivers/gpu/drm/i915/i915_gem_context.h | 34 +++++++++++++++++++++++
drivers/gpu/drm/i915/intel_device_info.c | 47 ++++++++++++++++++++++++++++++--
drivers/gpu/drm/i915/intel_lrc.c | 45 +++++++++++++++++++++++++++++-
5 files changed, 148 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5c8d048..97cb36b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1593,6 +1593,11 @@ struct drm_i915_private {
struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */
int num_fence_regs; /* 8 on pre-965, 16 otherwise */
+ /* optimal slice/subslice/EU configration state */
+ struct i915_sseu_optimum_config *opt_config;
+
+ int predictive_load_enable;
+
unsigned int fsb_freq, mem_freq, is_ddr3;
unsigned int skl_preferred_vco_freq;
unsigned int max_cdclk_freq;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index a5876fe..8f16ef1 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -454,10 +454,30 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
trace_i915_context_create(ctx);
atomic_set(&ctx->req_cnt, 0);
+ ctx->slice_cnt = hweight8(RUNTIME_INFO(dev_priv)->sseu.slice_mask);
+ ctx->subslice_cnt = hweight8(
+ RUNTIME_INFO(dev_priv)->sseu.subslice_mask[0]);
+ ctx->eu_cnt = RUNTIME_INFO(dev_priv)->sseu.eu_per_subslice;
return ctx;
}
+
+void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
+ enum gem_load_type type)
+{
+ struct drm_i915_private *dev_priv = ctx->i915;
+
+ if (GEM_WARN_ON(type > LOAD_TYPE_LAST))
+ return;
+
+ /* Call opt_config to get correct configuration for eu,slice,subslice */
+ ctx->slice_cnt = dev_priv->opt_config[type].slice;
+ ctx->subslice_cnt = dev_priv->opt_config[type].subslice;
+ ctx->eu_cnt = dev_priv->opt_config[type].eu;
+ ctx->pending_load_type = type;
+}
+
/**
* i915_gem_context_create_gvt - create a GVT GEM context
* @dev: drm device *
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index c940168..0a24d28 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -54,6 +54,19 @@ struct intel_context_ops {
void (*destroy)(struct intel_context *ce);
};
+enum gem_load_type {
+ LOAD_TYPE_LOW,
+ LOAD_TYPE_MEDIUM,
+ LOAD_TYPE_HIGH,
+ LOAD_TYPE_LAST
+};
+
+struct i915_sseu_optimum_config {
+ u8 slice;
+ u8 subslice;
+ u8 eu;
+};
+
/*
* Powergating configuration for a particular (context,engine).
*/
@@ -232,6 +245,25 @@ struct i915_gem_context {
* go for low/medium/high load configuration of the GPU.
*/
atomic_t req_cnt;
+
+ /** slice_cnt: used to set the # of slices to be enabled. */
+ u8 slice_cnt;
+
+ /** subslice_cnt: used to set the # of subslices to be enabled. */
+ u8 subslice_cnt;
+
+ /** eu_cnt: used to set the # of eu to be enabled. */
+ u8 eu_cnt;
+
+ /** load_type: The designated load_type (high/medium/low) for a given
+ * number of pending commands in the command queue.
+ */
+ enum gem_load_type load_type;
+
+ /** pending_load_type: The earlier load type that the GPU was configured
+ * for (high/medium/low).
+ */
+ enum gem_load_type pending_load_type;
};
static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
@@ -375,6 +407,8 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
+void i915_gem_context_set_load_type(struct i915_gem_context *ctx,
+ enum gem_load_type type);
struct i915_gem_context *
i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 855a507..017a1e2 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -707,6 +707,27 @@ static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv)
return 0;
}
+/* static table of slice/subslice/EU for Cherryview */
+static const struct i915_sseu_optimum_config chv_config[LOAD_TYPE_LAST] = {
+ {1, 1, 4}, /* Low */
+ {1, 1, 6}, /* Medium */
+ {1, 2, 6} /* High */
+};
+
+/* static table of slice/subslice/EU for KBL GT2 */
+static const struct i915_sseu_optimum_config kbl_gt2_config[LOAD_TYPE_LAST] = {
+ {1, 3, 2}, /* Low */
+ {1, 3, 4}, /* Medium */
+ {1, 3, 8} /* High */
+};
+
+/* static table of slice/subslice/EU for KBL GT3 */
+static const struct i915_sseu_optimum_config kbl_gt3_config[LOAD_TYPE_LAST] = {
+ {2, 3, 4}, /* Low */
+ {2, 3, 6}, /* Medium */
+ {2, 3, 8} /* High */
+};
+
/**
* intel_device_info_runtime_init - initialize runtime info
* @dev_priv: the i915 device
@@ -728,6 +749,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
struct intel_device_info *info = mkwrite_device_info(dev_priv);
struct intel_runtime_info *runtime = RUNTIME_INFO(dev_priv);
enum pipe pipe;
+ struct i915_sseu_optimum_config *opt_config = NULL;
if (INTEL_GEN(dev_priv) >= 10) {
for_each_pipe(dev_priv, pipe)
@@ -831,12 +853,30 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
/* Initialize slice/subslice/EU info */
if (IS_HASWELL(dev_priv))
haswell_sseu_info_init(dev_priv);
- else if (IS_CHERRYVIEW(dev_priv))
+ else if (IS_CHERRYVIEW(dev_priv)) {
cherryview_sseu_info_init(dev_priv);
+ opt_config = chv_config;
+ BUILD_BUG_ON(ARRAY_SIZE(chv_config) != LOAD_TYPE_LAST);
+ }
else if (IS_BROADWELL(dev_priv))
broadwell_sseu_info_init(dev_priv);
- else if (IS_GEN(dev_priv, 9))
+ else if (IS_GEN(dev_priv, 9)) {
gen9_sseu_info_init(dev_priv);
+
+ switch (info->gt) {
+ default: /* fall through */
+ case 2:
+ opt_config = kbl_gt2_config;
+ BUILD_BUG_ON(ARRAY_SIZE(kbl_gt2_config)
+ != LOAD_TYPE_LAST);
+ break;
+ case 3:
+ opt_config = kbl_gt3_config;
+ BUILD_BUG_ON(ARRAY_SIZE(kbl_gt3_config)
+ != LOAD_TYPE_LAST);
+ break;
+ }
+ }
else if (IS_GEN(dev_priv, 10))
gen10_sseu_info_init(dev_priv);
else if (INTEL_GEN(dev_priv) >= 11)
@@ -847,6 +887,9 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
info->ppgtt = INTEL_PPGTT_NONE;
}
+ if (opt_config)
+ dev_priv->opt_config = opt_config;
+
/* Initialize command stream timestamp frequency */
runtime->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv);
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d0af37d..397af1e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1282,6 +1282,35 @@ static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma)
return i915_vma_pin(vma, 0, 0, flags);
}
+static u32
+get_context_rpcs_config(struct i915_gem_context *ctx)
+{
+ u32 rpcs = 0;
+ struct drm_i915_private *dev_priv = ctx->i915;
+
+ if (INTEL_GEN(dev_priv) < 8)
+ return 0;
+
+ if (RUNTIME_INFO(dev_priv)->sseu.has_slice_pg) {
+ rpcs |= GEN8_RPCS_S_CNT_ENABLE;
+ rpcs |= ctx->slice_cnt << GEN8_RPCS_S_CNT_SHIFT;
+ rpcs |= GEN8_RPCS_ENABLE;
+ }
+
+ if (RUNTIME_INFO(dev_priv)->sseu.has_subslice_pg) {
+ rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
+ rpcs |= ctx->subslice_cnt << GEN8_RPCS_SS_CNT_SHIFT;
+ rpcs |= GEN8_RPCS_ENABLE;
+ }
+
+ if (RUNTIME_INFO(dev_priv)->sseu.has_eu_pg) {
+ rpcs |= ctx->eu_cnt << GEN8_RPCS_EU_MIN_SHIFT;
+ rpcs |= ctx->eu_cnt << GEN8_RPCS_EU_MAX_SHIFT;
+ rpcs |= GEN8_RPCS_ENABLE;
+ }
+
+ return rpcs;
+}
static void
__execlists_update_reg_state(struct intel_engine_cs *engine,
struct intel_context *ce)
@@ -1294,9 +1323,20 @@ __execlists_update_reg_state(struct intel_engine_cs *engine,
regs[CTX_RING_TAIL + 1] = ring->tail;
/* RPCS */
- if (engine->class == RENDER_CLASS)
+ if (engine->class == RENDER_CLASS &&
+ engine->i915->predictive_load_enable) {
+ u32 rpcs_config = 0;
+ struct i915_gem_context *ctx = ce->gem_context;
+
+ rpcs_config = get_context_rpcs_config(ctx);
+ regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
+ CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
+ rpcs_config);
+
+ } else if (engine->class == RENDER_CLASS) {
regs[CTX_R_PWR_CLK_STATE + 1] = gen8_make_rpcs(engine->i915,
&ce->sseu);
+ }
}
static struct intel_context *
@@ -1340,6 +1380,9 @@ __execlists_context_pin(struct intel_engine_cs *engine,
__execlists_update_reg_state(engine, ce);
+ if (ctx->load_type != ctx->pending_load_type)
+ ctx->load_type = ctx->pending_load_type;
+
ce->state->obj->pin_global++;
i915_gem_context_get(ctx);
return ce;
--
2.7.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2019-03-14 8:39 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-03-14 8:36 [PATCH v4 0/3] drm/i915: Context aware user agnostic EU/Slice/Sub-slice control within kernel Ankit Navik
2019-03-14 8:36 ` [PATCH v4 1/3] drm/i915: Get active pending request for given context Ankit Navik
2019-03-14 8:55 ` Chris Wilson
2019-03-14 10:39 ` Tvrtko Ursulin
2019-03-14 8:36 ` Ankit Navik [this message]
2019-03-14 10:45 ` [PATCH v4 2/3] drm/i915: set optimum eu/slice/sub-slice configuration based on load type Tvrtko Ursulin
2019-03-14 10:52 ` kbuild test robot
2019-03-14 11:04 ` kbuild test robot
2019-03-14 15:50 ` kbuild test robot
2019-03-14 8:36 ` [PATCH v4 3/3] drm/i915: Predictive governor to control eu/slice/subslice Ankit Navik
2019-03-14 10:58 ` Tvrtko Ursulin
2019-03-14 9:35 ` ✗ Fi.CI.BAT: failure for drm/i915: Context aware user agnostic EU/Slice/Sub-slice control within kernel Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1552552615-6703-3-git-send-email-ankit.p.navik@intel.com \
--to=ankit.p.navik@intel.com \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.