From: Stuart Summers <stuart.summers@intel.com>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 6/6] drm/i915: Expand subslice mask
Date: Tue, 30 Apr 2019 16:06:06 -0700 [thread overview]
Message-ID: <20190430230606.8421-7-stuart.summers@intel.com> (raw)
In-Reply-To: <20190430230606.8421-1-stuart.summers@intel.com>
Currently, the subslice_mask runtime parameter is stored as an
array of subslices per slice. Expand the subslice mask array to
better match what is presented to userspace through the
I915_QUERY_TOPOLOGY_INFO ioctl. The index into this array is
then calculated:
slice * subslice stride + subslice index / 8
v2: fix spacing in set_sseu_info args
use set_sseu_info to initialize sseu data when building
device status in debugfs
rename variables in intel_engine_types.h to avoid checkpatch
warnings
v3: update headers in intel_sseu.h
v4: add const to some sseu_dev_info variables
use sseu->eu_stride for EU stride calculations
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Stuart Summers <stuart.summers@intel.com>
---
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 +-
drivers/gpu/drm/i915/gt/intel_engine_types.h | 32 +++--
drivers/gpu/drm/i915/gt/intel_hangcheck.c | 3 +-
drivers/gpu/drm/i915/gt/intel_sseu.c | 49 +++++--
drivers/gpu/drm/i915/gt/intel_sseu.h | 16 ++-
drivers/gpu/drm/i915/gt/intel_workarounds.c | 2 +-
drivers/gpu/drm/i915/i915_debugfs.c | 44 +++---
drivers/gpu/drm/i915/i915_drv.c | 6 +-
drivers/gpu/drm/i915/i915_gpu_error.c | 5 +-
drivers/gpu/drm/i915/i915_query.c | 10 +-
drivers/gpu/drm/i915/intel_device_info.c | 142 +++++++++++--------
11 files changed, 198 insertions(+), 117 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index f7308479d511..e438d366874f 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -908,7 +908,7 @@ u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv)
const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
u32 mcr_s_ss_select;
u32 slice = fls(sseu->slice_mask);
- u32 subslice = fls(sseu->subslice_mask[slice]);
+ u32 subslice = fls(sseu->subslice_mask[slice * sseu->ss_stride]);
if (IS_GEN(dev_priv, 10))
mcr_s_ss_select = GEN8_MCR_SLICE(slice) |
@@ -984,6 +984,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
struct intel_instdone *instdone)
{
struct drm_i915_private *dev_priv = engine->i915;
+ const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
struct intel_uncore *uncore = engine->uncore;
u32 mmio_base = engine->mmio_base;
int slice;
@@ -1001,7 +1002,8 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
instdone->slice_common =
intel_uncore_read(uncore, GEN7_SC_INSTDONE);
- for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
+ for_each_instdone_slice_subslice(dev_priv, sseu, slice,
+ subslice) {
instdone->sampler[slice][subslice] =
read_subslice_reg(dev_priv, slice, subslice,
GEN7_SAMPLER_INSTDONE);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index d972c339309c..fa70528963a4 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -534,20 +534,22 @@ intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
}
-#define instdone_slice_mask(dev_priv__) \
- (IS_GEN(dev_priv__, 7) ? \
- 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
-
-#define instdone_subslice_mask(dev_priv__) \
- (IS_GEN(dev_priv__, 7) ? \
- 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
-
-#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
- for ((slice__) = 0, (subslice__) = 0; \
- (slice__) < I915_MAX_SLICES; \
- (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
- (slice__) += ((subslice__) == 0)) \
- for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
- (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
+#define instdone_has_slice(dev_priv___, sseu___, slice___) \
+ ((IS_GEN(dev_priv___, 7) ? \
+ 1 : (sseu___)->slice_mask) & \
+ BIT(slice___)) \
+
+#define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \
+ ((IS_GEN(dev_priv__, 7) ? \
+ 1 : (sseu__)->subslice_mask[slice__ * (sseu__)->ss_stride + \
+ subslice__ / BITS_PER_BYTE]) & \
+ BIT(subslice__ % BITS_PER_BYTE)) \
+
+#define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \
+ for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \
+ (subslice_) = ((subslice_) + 1) < I915_MAX_SUBSLICES ? (subslice_) + 1 : 0, \
+ (slice_) += ((subslice_) == 0)) \
+ for_each_if(instdone_has_slice(dev_priv_, sseu_, slice) && \
+ instdone_has_subslice(dev_priv_, sseu_, slice_, subslice_)) \
#endif /* __INTEL_ENGINE_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
index e5eaa06fe74d..53c1c98161e1 100644
--- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
@@ -50,6 +50,7 @@ static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
static bool subunits_stuck(struct intel_engine_cs *engine)
{
struct drm_i915_private *dev_priv = engine->i915;
+ const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
struct intel_instdone instdone;
struct intel_instdone *accu_instdone = &engine->hangcheck.instdone;
bool stuck;
@@ -71,7 +72,7 @@ static bool subunits_stuck(struct intel_engine_cs *engine)
stuck &= instdone_unchanged(instdone.slice_common,
&accu_instdone->slice_common);
- for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
+ for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) {
stuck &= instdone_unchanged(instdone.sampler[slice][subslice],
&accu_instdone->sampler[slice][subslice]);
stuck &= instdone_unchanged(instdone.row[slice][subslice],
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 4a0b82fc108c..49316b7ef074 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -8,6 +8,17 @@
#include "intel_lrc_reg.h"
#include "intel_sseu.h"
+void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
+ u8 max_subslices, u8 max_eus_per_subslice)
+{
+ sseu->max_slices = max_slices;
+ sseu->max_subslices = max_subslices;
+ sseu->max_eus_per_subslice = max_eus_per_subslice;
+
+ sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
+ sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
+}
+
unsigned int
intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
{
@@ -22,17 +33,39 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
unsigned int
intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
{
- return hweight8(sseu->subslice_mask[slice]);
+ unsigned int i, total = 0;
+
+ for (i = 0; i < sseu->ss_stride; i++)
+ total += hweight8(sseu->subslice_mask[slice * sseu->ss_stride +
+ i]);
+
+ return total;
+}
+
+void intel_sseu_copy_subslices(const struct sseu_dev_info *sseu, int slice,
+ u8 *to_mask, const u8 *from_mask)
+{
+ int offset = slice * sseu->ss_stride;
+
+ memcpy(&to_mask[offset], &from_mask[offset], sseu->ss_stride);
+}
+
+void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
+ u32 ss_mask)
+{
+ int i, offset = slice * sseu->ss_stride;
+
+ for (i = 0; i < sseu->ss_stride; i++)
+ sseu->subslice_mask[offset + i] =
+ (ss_mask >> (BITS_PER_BYTE * i)) & 0xff;
}
static int intel_sseu_eu_idx(const struct sseu_dev_info *sseu, int slice,
int subslice)
{
- int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice,
- BITS_PER_BYTE);
- int slice_stride = sseu->max_subslices * subslice_stride;
+ int slice_stride = sseu->max_subslices * sseu->eu_stride;
- return slice * slice_stride + subslice * subslice_stride;
+ return slice * slice_stride + subslice * sseu->eu_stride;
}
u16 intel_sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
@@ -41,8 +74,7 @@ u16 intel_sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
int i, offset = intel_sseu_eu_idx(sseu, slice, subslice);
u16 eu_mask = 0;
- for (i = 0;
- i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) {
+ for (i = 0; i < sseu->eu_stride; i++) {
eu_mask |= ((u16)sseu->eu_mask[offset + i]) <<
(i * BITS_PER_BYTE);
}
@@ -55,8 +87,7 @@ void intel_sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
{
int i, offset = intel_sseu_eu_idx(sseu, slice, subslice);
- for (i = 0;
- i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) {
+ for (i = 0; i < sseu->eu_stride; i++) {
sseu->eu_mask[offset + i] =
(eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index 56e3721ae83f..bf01f338a8cc 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -9,16 +9,18 @@
#include <linux/types.h>
#include <linux/kernel.h>
+#include <linux/string.h>
struct drm_i915_private;
#define GEN_MAX_SLICES (6) /* CNL upper bound */
#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */
#define GEN_SSEU_STRIDE(bits) DIV_ROUND_UP(bits, BITS_PER_BYTE)
+#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
struct sseu_dev_info {
u8 slice_mask;
- u8 subslice_mask[GEN_MAX_SLICES];
+ u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
u16 eu_total;
u8 eu_per_subslice;
u8 min_eu_in_pool;
@@ -33,6 +35,9 @@ struct sseu_dev_info {
u8 max_subslices;
u8 max_eus_per_subslice;
+ u8 ss_stride;
+ u8 eu_stride;
+
/* We don't have more than 8 eus per subslice at the moment and as we
* store eus enabled using bits, no need to multiply by eus per
* subslice.
@@ -63,12 +68,21 @@ intel_sseu_from_device_info(const struct sseu_dev_info *sseu)
return value;
}
+void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
+ u8 max_subslices, u8 max_eus_per_subslice);
+
unsigned int
intel_sseu_subslice_total(const struct sseu_dev_info *sseu);
unsigned int
intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
+void intel_sseu_copy_subslices(const struct sseu_dev_info *sseu, int slice,
+ u8 *to_mask, const u8 *from_mask);
+
+void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
+ u32 ss_mask);
+
u16 intel_sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
int subslice);
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 5751446a4b0b..51df88873ff5 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -771,7 +771,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
u32 slice = fls(sseu->slice_mask);
u32 fuse3 =
intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3);
- u8 ss_mask = sseu->subslice_mask[slice];
+ u8 ss_mask = sseu->subslice_mask[slice * sseu->ss_stride];
u8 enabled_mask = (ss_mask | ss_mask >>
GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 20ac782c50c8..a45299cd6989 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1256,6 +1256,7 @@ static void i915_instdone_info(struct drm_i915_private *dev_priv,
struct seq_file *m,
struct intel_instdone *instdone)
{
+ struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
int slice;
int subslice;
@@ -1271,11 +1272,11 @@ static void i915_instdone_info(struct drm_i915_private *dev_priv,
if (INTEL_GEN(dev_priv) <= 6)
return;
- for_each_instdone_slice_subslice(dev_priv, slice, subslice)
+ for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice)
seq_printf(m, "\t\tSAMPLER_INSTDONE[%d][%d]: 0x%08x\n",
slice, subslice, instdone->sampler[slice][subslice]);
- for_each_instdone_slice_subslice(dev_priv, slice, subslice)
+ for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice)
seq_printf(m, "\t\tROW_INSTDONE[%d][%d]: 0x%08x\n",
slice, subslice, instdone->row[slice][subslice]);
}
@@ -4065,7 +4066,9 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
continue;
sseu->slice_mask |= BIT(s);
- sseu->subslice_mask[s] = info->sseu.subslice_mask[s];
+ intel_sseu_copy_subslices(&info->sseu, s,
+ sseu->subslice_mask,
+ info->sseu.subslice_mask);
for (ss = 0; ss < info->sseu.max_subslices; ss++) {
unsigned int eu_cnt;
@@ -4116,18 +4119,22 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
sseu->slice_mask |= BIT(s);
if (IS_GEN9_BC(dev_priv))
- sseu->subslice_mask[s] =
- RUNTIME_INFO(dev_priv)->sseu.subslice_mask[s];
+ intel_sseu_copy_subslices(&info->sseu, s,
+ sseu->subslice_mask,
+ info->sseu.subslice_mask);
for (ss = 0; ss < info->sseu.max_subslices; ss++) {
unsigned int eu_cnt;
+ u8 ss_idx = s * info->sseu.ss_stride +
+ ss / BITS_PER_BYTE;
if (IS_GEN9_LP(dev_priv)) {
if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
/* skip disabled subslice */
continue;
- sseu->subslice_mask[s] |= BIT(ss);
+ sseu->subslice_mask[ss_idx] |=
+ BIT(ss % BITS_PER_BYTE);
}
eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
@@ -4144,25 +4151,24 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
struct sseu_dev_info *sseu)
{
+ struct intel_runtime_info *info = RUNTIME_INFO(dev_priv);
u32 slice_info = I915_READ(GEN8_GT_SLICE_INFO);
int s;
sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
if (sseu->slice_mask) {
- sseu->eu_per_subslice =
- RUNTIME_INFO(dev_priv)->sseu.eu_per_subslice;
- for (s = 0; s < fls(sseu->slice_mask); s++) {
- sseu->subslice_mask[s] =
- RUNTIME_INFO(dev_priv)->sseu.subslice_mask[s];
- }
+ sseu->eu_per_subslice = info->sseu.eu_per_subslice;
+ for (s = 0; s < fls(sseu->slice_mask); s++)
+ intel_sseu_copy_subslices(&info->sseu, s,
+ sseu->subslice_mask,
+ info->sseu.subslice_mask);
sseu->eu_total = sseu->eu_per_subslice *
intel_sseu_subslice_total(sseu);
/* subtract fused off EU(s) from enabled slice(s) */
for (s = 0; s < fls(sseu->slice_mask); s++) {
- u8 subslice_7eu =
- RUNTIME_INFO(dev_priv)->sseu.subslice_7eu[s];
+ u8 subslice_7eu = info->sseu.subslice_7eu[s];
sseu->eu_total -= hweight8(subslice_7eu);
}
@@ -4209,6 +4215,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
static int i915_sseu_status(struct seq_file *m, void *unused)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
+ const struct intel_runtime_info *info = RUNTIME_INFO(dev_priv);
struct sseu_dev_info sseu;
intel_wakeref_t wakeref;
@@ -4216,14 +4223,13 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
return -ENODEV;
seq_puts(m, "SSEU Device Info\n");
- i915_print_sseu_info(m, true, &RUNTIME_INFO(dev_priv)->sseu);
+ i915_print_sseu_info(m, true, &info->sseu);
seq_puts(m, "SSEU Device Status\n");
memset(&sseu, 0, sizeof(sseu));
- sseu.max_slices = RUNTIME_INFO(dev_priv)->sseu.max_slices;
- sseu.max_subslices = RUNTIME_INFO(dev_priv)->sseu.max_subslices;
- sseu.max_eus_per_subslice =
- RUNTIME_INFO(dev_priv)->sseu.max_eus_per_subslice;
+ intel_sseu_set_info(&sseu, info->sseu.max_slices,
+ info->sseu.max_subslices,
+ info->sseu.max_eus_per_subslice);
with_intel_runtime_pm(dev_priv, wakeref) {
if (IS_CHERRYVIEW(dev_priv))
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index c7ab0f724021..820b9216cb13 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -323,7 +323,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
struct pci_dev *pdev = dev_priv->drm.pdev;
struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
drm_i915_getparam_t *param = data;
- int value;
+ int value = 0;
switch (param->param) {
case I915_PARAM_IRQ_ACTIVE:
@@ -452,7 +452,9 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
return -ENODEV;
break;
case I915_PARAM_SUBSLICE_MASK:
- value = sseu->subslice_mask[0];
+ /* Only copy bits from the first subslice */
+ memcpy(&value, sseu->subslice_mask,
+ min(sseu->ss_stride, (u8)sizeof(value)));
if (!value)
return -ENODEV;
break;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f51ff683dd2e..9da4118ad43a 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -405,6 +405,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
static void error_print_instdone(struct drm_i915_error_state_buf *m,
const struct drm_i915_error_engine *ee)
{
+ struct sseu_dev_info *sseu = &RUNTIME_INFO(m->i915)->sseu;
int slice;
int subslice;
@@ -420,12 +421,12 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m,
if (INTEL_GEN(m->i915) <= 6)
return;
- for_each_instdone_slice_subslice(m->i915, slice, subslice)
+ for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice)
err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n",
slice, subslice,
ee->instdone.sampler[slice][subslice]);
- for_each_instdone_slice_subslice(m->i915, slice, subslice)
+ for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice)
err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n",
slice, subslice,
ee->instdone.row[slice][subslice]);
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index 7c1708c22811..000dcb145ce0 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -37,8 +37,6 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
struct drm_i915_query_topology_info topo;
u32 slice_length, subslice_length, eu_length, total_length;
- u8 subslice_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
- u8 eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
int ret;
if (query_item->flags != 0)
@@ -50,8 +48,8 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
slice_length = sizeof(sseu->slice_mask);
- subslice_length = sseu->max_slices * subslice_stride;
- eu_length = sseu->max_slices * sseu->max_subslices * eu_stride;
+ subslice_length = sseu->max_slices * sseu->ss_stride;
+ eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride;
total_length = sizeof(topo) + slice_length + subslice_length +
eu_length;
@@ -69,9 +67,9 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
topo.max_eus_per_subslice = sseu->max_eus_per_subslice;
topo.subslice_offset = slice_length;
- topo.subslice_stride = subslice_stride;
+ topo.subslice_stride = sseu->ss_stride;
topo.eu_offset = slice_length + subslice_length;
- topo.eu_stride = eu_stride;
+ topo.eu_stride = sseu->eu_stride;
if (__copy_to_user(u64_to_user_ptr(query_item->data_ptr),
&topo, sizeof(topo)))
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index e1dbccf04cd9..bbbc0a8c2183 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -84,17 +84,42 @@ void intel_device_info_dump_flags(const struct intel_device_info *info,
#undef PRINT_FLAG
}
+#define SS_STR_MAX_SIZE (GEN_MAX_SUBSLICE_STRIDE * 2)
+
+static u8 *
+subslice_per_slice_str(u8 *buf, const struct sseu_dev_info *sseu, u8 slice)
+{
+ int i;
+ u8 ss_offset = slice * sseu->ss_stride;
+
+ GEM_BUG_ON(slice >= sseu->max_slices);
+
+ memset(buf, 0, SS_STR_MAX_SIZE);
+
+ /*
+ * Print subslice information in reverse order to match
+ * userspace expectations.
+ */
+ for (i = 0; i < sseu->ss_stride; i++)
+ sprintf(&buf[i * 2], "%02x",
+ sseu->subslice_mask[ss_offset + sseu->ss_stride -
+ (i + 1)]);
+
+ return buf;
+}
+
static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
{
int s;
+ u8 buf[SS_STR_MAX_SIZE];
drm_printf(p, "slice total: %u, mask=%04x\n",
hweight8(sseu->slice_mask), sseu->slice_mask);
drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu));
for (s = 0; s < sseu->max_slices; s++) {
- drm_printf(p, "slice%d: %u subslices, mask=%04x\n",
+ drm_printf(p, "slice%d: %u subslices, mask=%s\n",
s, intel_sseu_subslices_per_slice(sseu, s),
- sseu->subslice_mask[s]);
+ subslice_per_slice_str(buf, sseu, s));
}
drm_printf(p, "EU total: %u\n", sseu->eu_total);
drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
@@ -118,6 +143,7 @@ void intel_device_info_dump_topology(const struct sseu_dev_info *sseu,
struct drm_printer *p)
{
int s, ss;
+ u8 buf[SS_STR_MAX_SIZE];
if (sseu->max_slices == 0) {
drm_printf(p, "Unavailable\n");
@@ -125,9 +151,9 @@ void intel_device_info_dump_topology(const struct sseu_dev_info *sseu,
}
for (s = 0; s < sseu->max_slices; s++) {
- drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n",
+ drm_printf(p, "slice%d: %u subslice(s) (0x%s):\n",
s, intel_sseu_subslices_per_slice(sseu, s),
- sseu->subslice_mask[s]);
+ subslice_per_slice_str(buf, sseu, s));
for (ss = 0; ss < sseu->max_subslices; ss++) {
u16 enabled_eus = intel_sseu_get_eus(sseu, s, ss);
@@ -156,15 +182,10 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
u8 eu_en;
int s;
- if (IS_ELKHARTLAKE(dev_priv)) {
- sseu->max_slices = 1;
- sseu->max_subslices = 4;
- sseu->max_eus_per_subslice = 8;
- } else {
- sseu->max_slices = 1;
- sseu->max_subslices = 8;
- sseu->max_eus_per_subslice = 8;
- }
+ if (IS_ELKHARTLAKE(dev_priv))
+ intel_sseu_set_info(sseu, 1, 4, 8);
+ else
+ intel_sseu_set_info(sseu, 1, 8, 8);
s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE);
@@ -177,9 +198,11 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
int ss;
sseu->slice_mask |= BIT(s);
- sseu->subslice_mask[s] = (ss_en >> ss_idx) & ss_en_mask;
+ sseu->subslice_mask[s * sseu->ss_stride] =
+ (ss_en >> ss_idx) & ss_en_mask;
for (ss = 0; ss < sseu->max_subslices; ss++) {
- if (sseu->subslice_mask[s] & BIT(ss))
+ if (sseu->subslice_mask[s * sseu->ss_stride] &
+ BIT(ss))
intel_sseu_set_eus(sseu, s, ss, eu_en);
}
}
@@ -201,23 +224,10 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
const int eu_mask = 0xff;
u32 subslice_mask, eu_en;
+ intel_sseu_set_info(sseu, 6, 4, 8);
+
sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
GEN10_F2_S_ENA_SHIFT;
- sseu->max_slices = 6;
- sseu->max_subslices = 4;
- sseu->max_eus_per_subslice = 8;
-
- subslice_mask = (1 << 4) - 1;
- subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
- GEN10_F2_SS_DIS_SHIFT);
-
- /*
- * Slice0 can have up to 3 subslices, but there are only 2 in
- * slice1/2.
- */
- sseu->subslice_mask[0] = subslice_mask;
- for (s = 1; s < sseu->max_slices; s++)
- sseu->subslice_mask[s] = subslice_mask & 0x3;
/* Slice0 */
eu_en = ~I915_READ(GEN8_EU_DISABLE0);
@@ -242,14 +252,22 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
eu_en = ~I915_READ(GEN10_EU_DISABLE3);
intel_sseu_set_eus(sseu, 5, 1, eu_en & eu_mask);
- /* Do a second pass where we mark the subslices disabled if all their
- * eus are off.
- */
+ subslice_mask = (1 << 4) - 1;
+ subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
+ GEN10_F2_SS_DIS_SHIFT);
+
for (s = 0; s < sseu->max_slices; s++) {
for (ss = 0; ss < sseu->max_subslices; ss++) {
if (intel_sseu_get_eus(sseu, s, ss) == 0)
- sseu->subslice_mask[s] &= ~BIT(ss);
+ subslice_mask &= ~BIT(ss);
}
+
+ /*
+ * Slice0 can have up to 3 subslices, but there are only 2 in
+ * slice1/2.
+ */
+ intel_sseu_set_subslices(sseu, s, s == 0 ? subslice_mask :
+ subslice_mask & 0x3);
}
sseu->eu_total = compute_eu_total(sseu);
@@ -275,13 +293,12 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
{
struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
u32 fuse;
+ u8 subslice_mask;
fuse = I915_READ(CHV_FUSE_GT);
sseu->slice_mask = BIT(0);
- sseu->max_slices = 1;
- sseu->max_subslices = 2;
- sseu->max_eus_per_subslice = 8;
+ intel_sseu_set_info(sseu, 1, 2, 8);
if (!(fuse & CHV_FGT_DISABLE_SS0)) {
u8 disabled_mask =
@@ -290,7 +307,7 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
(((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
- sseu->subslice_mask[0] |= BIT(0);
+ subslice_mask |= BIT(0);
intel_sseu_set_eus(sseu, 0, 0, ~disabled_mask);
}
@@ -301,10 +318,12 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
(((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
- sseu->subslice_mask[0] |= BIT(1);
+ subslice_mask |= BIT(1);
intel_sseu_set_eus(sseu, 0, 1, ~disabled_mask);
}
+ intel_sseu_set_subslices(sseu, 0, subslice_mask);
+
sseu->eu_total = compute_eu_total(sseu);
/*
@@ -312,7 +331,8 @@ static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
* across subslices.
*/
sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
- sseu->eu_total / intel_sseu_subslice_total(sseu) :
+ sseu->eu_total /
+ intel_sseu_subslice_total(sseu) :
0;
/*
* CHV supports subslice power gating on devices with more than
@@ -336,9 +356,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
/* BXT has a single slice and at most 3 subslices. */
- sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
- sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
- sseu->max_eus_per_subslice = 8;
+ intel_sseu_set_info(sseu, IS_GEN9_LP(dev_priv) ? 1 : 3,
+ IS_GEN9_LP(dev_priv) ? 3 : 4, 8);
/*
* The subslice disable field is global, i.e. it applies
@@ -357,14 +376,16 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
/* skip disabled slice */
continue;
- sseu->subslice_mask[s] = subslice_mask;
+ intel_sseu_set_subslices(sseu, s, subslice_mask);
eu_disable = I915_READ(GEN9_EU_DISABLE(s));
for (ss = 0; ss < sseu->max_subslices; ss++) {
int eu_per_ss;
u8 eu_disabled_mask;
+ u8 ss_idx = s * sseu->ss_stride + ss / BITS_PER_BYTE;
- if (!(sseu->subslice_mask[s] & BIT(ss)))
+ if (!(sseu->subslice_mask[ss_idx] &
+ BIT(ss % BITS_PER_BYTE)))
/* skip disabled subslice */
continue;
@@ -437,9 +458,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
fuse2 = I915_READ(GEN8_FUSE2);
sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
- sseu->max_slices = 3;
- sseu->max_subslices = 3;
- sseu->max_eus_per_subslice = 8;
+ intel_sseu_set_info(sseu, 3, 3, 8);
/*
* The subslice disable field is global, i.e. it applies
@@ -466,18 +485,21 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
/* skip disabled slice */
continue;
- sseu->subslice_mask[s] = subslice_mask;
+ intel_sseu_set_subslices(sseu, s, subslice_mask);
for (ss = 0; ss < sseu->max_subslices; ss++) {
u8 eu_disabled_mask;
+ u8 ss_idx = s * sseu->ss_stride + ss / BITS_PER_BYTE;
u32 n_disabled;
- if (!(sseu->subslice_mask[s] & BIT(ss)))
+ if (!(sseu->subslice_mask[ss_idx] &
+ BIT(ss % BITS_PER_BYTE)))
/* skip disabled subslice */
continue;
eu_disabled_mask =
- eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
+ eu_disable[s] >>
+ (ss * sseu->max_eus_per_subslice);
intel_sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
@@ -517,6 +539,7 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
u32 fuse1;
int s, ss;
+ u32 subslice_mask;
/*
* There isn't a register to tell us how many slices/subslices. We
@@ -528,22 +551,18 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
/* fall through */
case 1:
sseu->slice_mask = BIT(0);
- sseu->subslice_mask[0] = BIT(0);
+ subslice_mask = BIT(0);
break;
case 2:
sseu->slice_mask = BIT(0);
- sseu->subslice_mask[0] = BIT(0) | BIT(1);
+ subslice_mask = BIT(0) | BIT(1);
break;
case 3:
sseu->slice_mask = BIT(0) | BIT(1);
- sseu->subslice_mask[0] = BIT(0) | BIT(1);
- sseu->subslice_mask[1] = BIT(0) | BIT(1);
+ subslice_mask = BIT(0) | BIT(1);
break;
}
- sseu->max_slices = hweight8(sseu->slice_mask);
- sseu->max_subslices = hweight8(sseu->subslice_mask[0]);
-
fuse1 = I915_READ(HSW_PAVP_FUSE1);
switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) {
default:
@@ -560,9 +579,14 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
sseu->eu_per_subslice = 6;
break;
}
- sseu->max_eus_per_subslice = sseu->eu_per_subslice;
+
+ intel_sseu_set_info(sseu, hweight8(sseu->slice_mask),
+ hweight8(subslice_mask),
+ sseu->eu_per_subslice);
for (s = 0; s < sseu->max_slices; s++) {
+ intel_sseu_set_subslices(sseu, s, subslice_mask);
+
for (ss = 0; ss < sseu->max_subslices; ss++) {
intel_sseu_set_eus(sseu, s, ss,
(1UL << sseu->eu_per_subslice) - 1);
--
2.21.0.5.gaeb582a983
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2019-04-30 23:06 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-04-30 23:06 [PATCH 0/6] Refactor to expand subslice mask Stuart Summers
2019-04-30 23:06 ` [PATCH 1/6] drm/i915: Use local variable for SSEU info in GETPARAM ioctl Stuart Summers
2019-05-01 0:44 ` Stuart Summers
2019-04-30 23:06 ` [PATCH 2/6] drm/i915: Add macro for SSEU stride calculation Stuart Summers
2019-04-30 23:06 ` [PATCH 3/6] drm/i915: Move calculation of subslices per slice to new function Stuart Summers
2019-04-30 23:06 ` [PATCH 4/6] drm/i915: Move sseu helper functions to intel_sseu.h Stuart Summers
2019-04-30 23:06 ` [PATCH 5/6] drm/i915: Remove inline from sseu helper functions Stuart Summers
2019-04-30 23:06 ` Stuart Summers [this message]
2019-04-30 23:13 ` ✗ Fi.CI.CHECKPATCH: warning for Refactor to expand subslice mask (rev5) Patchwork
2019-04-30 23:16 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-05-01 0:00 ` ✓ Fi.CI.BAT: success " Patchwork
2019-05-01 0:58 ` ✗ Fi.CI.BAT: failure for Refactor to expand subslice mask (rev6) Patchwork
2019-05-01 15:36 ` Summers, Stuart
-- strict thread matches above, loose matches on Subject: below --
2019-05-01 15:34 [PATCH 0/6] Refactor to expand subslice mask Stuart Summers
2019-05-01 15:34 ` [PATCH 6/6] drm/i915: Expand " Stuart Summers
2019-05-01 18:22 ` Tvrtko Ursulin
2019-05-01 18:29 ` Tvrtko Ursulin
2019-05-01 19:40 ` Summers, Stuart
2019-05-01 22:04 ` Daniele Ceraolo Spurio
2019-05-02 14:47 ` Summers, Stuart
2019-05-03 9:05 ` Lionel Landwerlin
2019-05-03 14:28 ` Summers, Stuart
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190430230606.8421-7-stuart.summers@intel.com \
--to=stuart.summers@intel.com \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.