public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Lionel Landwerlin <lionel.g.landwerlin@intel.com>,
	intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH v4 1/6] drm/i915: store all subslice masks
Date: Mon, 15 Jan 2018 17:37:47 +0000	[thread overview]
Message-ID: <61e6c5d8-4c40-1ab5-1105-caadc5a752b7@linux.intel.com> (raw)
In-Reply-To: <20180115144159.25913-2-lionel.g.landwerlin@intel.com>


On 15/01/2018 14:41, Lionel Landwerlin wrote:
> Up to now, subslice mask was assumed to be uniform across slices. But
> starting with Cannonlake, slices can be asymmetric (for example slice0
> has different number of subslices as slice1+). This change stores all
> subslices masks for all slices rather than having a single mask that
> applies to all slices.
> 
> v2: Rework how we store total numbers in sseu_dev_info (Tvrtko)
>      Fix CHV eu masks, was reading disabled as enabled (Tvrtko)
>      Readability changes (Tvrtko)
>      Add EU index helper (Tvrtko)
> 
> v3: Turn ALIGN(v, 8) / 8 into DIV_ROUND_UP(v, BITS_PER_BYTE) (Tvrtko)
>      Reuse sseu_eu_idx() for setting eu_mask on CHV (Tvrtko)
>      Reformat debug prints for subslices (Tvrtko)
> 
> v4: Change eu_mask helper into sseu_set_eus() (Tvrtko)
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c      |  25 ++--
>   drivers/gpu/drm/i915/i915_drv.c          |   2 +-
>   drivers/gpu/drm/i915/intel_device_info.c | 201 +++++++++++++++++++++++--------
>   drivers/gpu/drm/i915/intel_device_info.h |  47 +++++++-
>   drivers/gpu/drm/i915/intel_lrc.c         |   2 +-
>   drivers/gpu/drm/i915/intel_ringbuffer.h  |   2 +-
>   6 files changed, 216 insertions(+), 63 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index cc659b4b2a45..684551114965 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -4289,7 +4289,7 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
>   			continue;
>   
>   		sseu->slice_mask = BIT(0);
> -		sseu->subslice_mask |= BIT(ss);
> +		sseu->subslice_mask[0] |= BIT(ss);
>   		eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
>   			 ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
>   			 ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
> @@ -4336,7 +4336,7 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
>   			continue;
>   
>   		sseu->slice_mask |= BIT(s);
> -		sseu->subslice_mask = info->sseu.subslice_mask;
> +		sseu->subslice_mask[s] = info->sseu.subslice_mask[s];
>   
>   		for (ss = 0; ss < ss_max; ss++) {
>   			unsigned int eu_cnt;
> @@ -4391,8 +4391,8 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
>   		sseu->slice_mask |= BIT(s);
>   
>   		if (IS_GEN9_BC(dev_priv))
> -			sseu->subslice_mask =
> -				INTEL_INFO(dev_priv)->sseu.subslice_mask;
> +			sseu->subslice_mask[s] =
> +				INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
>   
>   		for (ss = 0; ss < ss_max; ss++) {
>   			unsigned int eu_cnt;
> @@ -4402,7 +4402,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
>   					/* skip disabled subslice */
>   					continue;
>   
> -				sseu->subslice_mask |= BIT(ss);
> +				sseu->subslice_mask[s] |= BIT(ss);
>   			}
>   
>   			eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
> @@ -4424,9 +4424,12 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
>   	sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
>   
>   	if (sseu->slice_mask) {
> -		sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask;
>   		sseu->eu_per_subslice =
>   				INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
> +		for (s = 0; s < fls(sseu->slice_mask); s++) {
> +			sseu->subslice_mask[s] =
> +				INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
> +		}
>   		sseu->eu_total = sseu->eu_per_subslice *
>   				 sseu_subslice_total(sseu);
>   
> @@ -4445,6 +4448,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
>   {
>   	struct drm_i915_private *dev_priv = node_to_i915(m->private);
>   	const char *type = is_available_info ? "Available" : "Enabled";
> +	int s;
>   
>   	seq_printf(m, "  %s Slice Mask: %04x\n", type,
>   		   sseu->slice_mask);
> @@ -4452,10 +4456,11 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
>   		   hweight8(sseu->slice_mask));
>   	seq_printf(m, "  %s Subslice Total: %u\n", type,
>   		   sseu_subslice_total(sseu));
> -	seq_printf(m, "  %s Subslice Mask: %04x\n", type,
> -		   sseu->subslice_mask);
> -	seq_printf(m, "  %s Subslice Per Slice: %u\n", type,
> -		   hweight8(sseu->subslice_mask));
> +	for (s = 0; s < fls(sseu->slice_mask); s++) {
> +		seq_printf(m, "  %s Slice%i %u subslices, mask=%04x\n", type,
> +			   s, hweight8(sseu->subslice_mask[s]),
> +			   sseu->subslice_mask[s]);
> +	}
>   	seq_printf(m, "  %s EU Total: %u\n", type,
>   		   sseu->eu_total);
>   	seq_printf(m, "  %s EU Per Subslice: %u\n", type,
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 6c8da9d20c33..969835d3cbcd 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -414,7 +414,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
>   			return -ENODEV;
>   		break;
>   	case I915_PARAM_SUBSLICE_MASK:
> -		value = INTEL_INFO(dev_priv)->sseu.subslice_mask;
> +		value = INTEL_INFO(dev_priv)->sseu.subslice_mask[0];
>   		if (!value)
>   			return -ENODEV;
>   		break;
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index d28592e43512..ed14994527fc 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -80,12 +80,16 @@ void intel_device_info_dump_flags(const struct intel_device_info *info,
>   
>   static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
>   {
> +	int s;
> +
>   	drm_printf(p, "slice mask: %04x\n", sseu->slice_mask);
>   	drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask));
>   	drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu));
> -	drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask);
> -	drm_printf(p, "subslice per slice: %u\n",
> -		   hweight8(sseu->subslice_mask));
> +	for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) {
> +		drm_printf(p, "slice%d %u subslices mask=%04x\n",
> +			   s, hweight8(sseu->subslice_mask[s]),
> +			   sseu->subslice_mask[s]);
> +	}
>   	drm_printf(p, "EU total: %u\n", sseu->eu_total);
>   	drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
>   	drm_printf(p, "has slice power gating: %s\n",
> @@ -119,22 +123,87 @@ void intel_device_info_dump(const struct intel_device_info *info,
>   	intel_device_info_dump_flags(info, p);
>   }
>   
> +static u16 compute_eu_total(const struct sseu_dev_info *sseu)
> +{
> +	u16 i, total = 0;
> +
> +	for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
> +		total += hweight8(sseu->eu_mask[i]);
> +
> +	return total;
> +}
> +
> +static u16 compute_subslice_total(const struct sseu_dev_info *sseu)
> +{
> +	u16 i, total = 0;
> +
> +	for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
> +		total += hweight8(sseu->subslice_mask[i]);
> +
> +	return total;
> +}
> +
>   static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>   	const u32 fuse2 = I915_READ(GEN8_FUSE2);
> +	int s, ss;
> +	const int eu_mask = 0xff;
> +	u32 subslice_mask, eu_en;
>   
>   	sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
>   			    GEN10_F2_S_ENA_SHIFT;
> -	sseu->subslice_mask = (1 << 4) - 1;
> -	sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
> -				 GEN10_F2_SS_DIS_SHIFT);
> +	sseu->max_slices = 6;
> +	sseu->max_subslices = 4;
> +	sseu->max_eus_per_subslice = 8;
>   
> -	sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0));
> -	sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1));
> -	sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2));
> -	sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) &
> -				     GEN10_EU_DIS_SS_MASK));
> +	subslice_mask = (1 << 4) - 1;
> +	subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
> +			   GEN10_F2_SS_DIS_SHIFT);
> +
> +	/*
> +	 * Slice0 can have up to 3 subslices, but there are only 2 in
> +	 * slice1/2.
> +	 */
> +	sseu->subslice_mask[0] = subslice_mask;
> +	for (s = 1; s < sseu->max_slices; s++)
> +		sseu->subslice_mask[s] = subslice_mask & 0x3;
> +
> +	/* Slice0 */
> +	eu_en = ~I915_READ(GEN8_EU_DISABLE0);
> +	for (ss = 0; ss < sseu->max_subslices; ss++)
> +		sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask);
> +	/* Slice1 */
> +	sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask);
> +	eu_en = ~I915_READ(GEN8_EU_DISABLE1);
> +	sseu_set_eus(sseu, 1, 1, eu_en & eu_mask);
> +	/* Slice2 */
> +	sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask);
> +	sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask);
> +	/* Slice3 */
> +	sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask);
> +	eu_en = ~I915_READ(GEN8_EU_DISABLE2);
> +	sseu_set_eus(sseu, 3, 1, eu_en & eu_mask);
> +	/* Slice4 */
> +	sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask);
> +	sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask);
> +	/* Slice5 */
> +	sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask);
> +	eu_en = ~I915_READ(GEN10_EU_DISABLE3);
> +	sseu_set_eus(sseu, 5, 1, eu_en & eu_mask);
> +
> +	/* Do a second pass where we mark the subslices disabled if all their
> +	 * eus are off.
> +	 */
> +	for (s = 0; s < sseu->max_slices; s++) {
> +		for (ss = 0; ss < sseu->max_subslices; ss++) {
> +			if (sseu_get_eus(sseu, s, ss) == 0)
> +				sseu->subslice_mask[s] &= ~BIT(ss);
> +		}
> +	}
> +
> +	sseu->subslice_total = compute_subslice_total(sseu);
> +	sseu->eu_total = compute_eu_total(sseu);
>   
>   	/*
>   	 * CNL is expected to always have a uniform distribution
> @@ -155,26 +224,40 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
>   static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
> -	u32 fuse, eu_dis;
> +	u32 fuse;
>   
>   	fuse = I915_READ(CHV_FUSE_GT);
>   
>   	sseu->slice_mask = BIT(0);
> +	sseu->max_slices = 1;
> +	sseu->max_subslices = 2;
> +	sseu->max_eus_per_subslice = 8;
>   
>   	if (!(fuse & CHV_FGT_DISABLE_SS0)) {
> -		sseu->subslice_mask |= BIT(0);
> -		eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK |
> -				 CHV_FGT_EU_DIS_SS0_R1_MASK);
> -		sseu->eu_total += 8 - hweight32(eu_dis);
> +		u8 disabled_mask =
> +			((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >>
> +			 CHV_FGT_EU_DIS_SS0_R0_SHIFT) |
> +			(((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
> +			  CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
> +
> +		sseu->subslice_mask[0] |= BIT(0);
> +		sseu_set_eus(sseu, 0, 0, ~disabled_mask);
>   	}
>   
>   	if (!(fuse & CHV_FGT_DISABLE_SS1)) {
> -		sseu->subslice_mask |= BIT(1);
> -		eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK |
> -				 CHV_FGT_EU_DIS_SS1_R1_MASK);
> -		sseu->eu_total += 8 - hweight32(eu_dis);
> +		u8 disabled_mask =
> +			((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >>
> +			 CHV_FGT_EU_DIS_SS1_R0_SHIFT) |
> +			(((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
> +			  CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
> +
> +		sseu->subslice_mask[0] |= BIT(1);
> +		sseu_set_eus(sseu, 0, 1, ~disabled_mask);
>   	}
>   
> +	sseu->subslice_total = compute_subslice_total(sseu);
> +	sseu->eu_total = compute_eu_total(sseu);
> +
>   	/*
>   	 * CHV expected to always have a uniform distribution of EU
>   	 * across subslices.
> @@ -196,41 +279,52 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_device_info *info = mkwrite_device_info(dev_priv);
>   	struct sseu_dev_info *sseu = &info->sseu;
> -	int s_max = 3, ss_max = 4, eu_max = 8;
>   	int s, ss;
> -	u32 fuse2, eu_disable;
> -	u8 eu_mask = 0xff;
> +	u32 fuse2, eu_disable, subslice_mask;
> +	const u8 eu_mask = 0xff;
>   
>   	fuse2 = I915_READ(GEN8_FUSE2);
>   	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
>   
> +	/* BXT has a single slice and at most 3 subslices. */
> +	sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
> +	sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
> +	sseu->max_eus_per_subslice = 8;
> +
>   	/*
>   	 * The subslice disable field is global, i.e. it applies
>   	 * to each of the enabled slices.
>   	*/
> -	sseu->subslice_mask = (1 << ss_max) - 1;
> -	sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
> -				 GEN9_F2_SS_DIS_SHIFT);
> +	subslice_mask = (1 << sseu->max_subslices) - 1;
> +	subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
> +			   GEN9_F2_SS_DIS_SHIFT);
>   
>   	/*
>   	 * Iterate through enabled slices and subslices to
>   	 * count the total enabled EU.
>   	*/
> -	for (s = 0; s < s_max; s++) {
> +	for (s = 0; s < sseu->max_slices; s++) {
>   		if (!(sseu->slice_mask & BIT(s)))
>   			/* skip disabled slice */
>   			continue;
>   
> +		sseu->subslice_mask[s] = subslice_mask;
> +
>   		eu_disable = I915_READ(GEN9_EU_DISABLE(s));
> -		for (ss = 0; ss < ss_max; ss++) {
> +		for (ss = 0; ss < sseu->max_subslices; ss++) {
>   			int eu_per_ss;
> +			u8 eu_disabled_mask;
>   
> -			if (!(sseu->subslice_mask & BIT(ss)))
> +			if (!(sseu->subslice_mask[s] & BIT(ss)))
>   				/* skip disabled subslice */
>   				continue;
>   
> -			eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) &
> -						      eu_mask);
> +			eu_disabled_mask = (eu_disable >> (ss*8)) & eu_mask;
> +
> +			sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
> +
> +			eu_per_ss = sseu->max_eus_per_subslice -
> +				hweight8(eu_disabled_mask);
>   
>   			/*
>   			 * Record which subslice(s) has(have) 7 EUs. we
> @@ -239,11 +333,12 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
>   			 */
>   			if (eu_per_ss == 7)
>   				sseu->subslice_7eu[s] |= BIT(ss);
> -
> -			sseu->eu_total += eu_per_ss;
>   		}
>   	}
>   
> +	sseu->subslice_total = compute_subslice_total(sseu);
> +	sseu->eu_total = compute_eu_total(sseu);
> +
>   	/*
>   	 * SKL is expected to always have a uniform distribution
>   	 * of EU across subslices with the exception that any one
> @@ -269,8 +364,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
>   	sseu->has_eu_pg = sseu->eu_per_subslice > 2;
>   
>   	if (IS_GEN9_LP(dev_priv)) {
> -#define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask & BIT(ss)))
> -		info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3;
> +#define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask[0] & BIT(ss)))
> +		info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3;
>   
>   		sseu->min_eu_in_pool = 0;
>   		if (info->has_pooled_eu) {
> @@ -288,19 +383,22 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
>   static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
> -	const int s_max = 3, ss_max = 3, eu_max = 8;
>   	int s, ss;
> -	u32 fuse2, eu_disable[3]; /* s_max */
> +	u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
>   
>   	fuse2 = I915_READ(GEN8_FUSE2);
>   	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
> +	sseu->max_slices = 3;
> +	sseu->max_subslices = 3;
> +	sseu->max_eus_per_subslice = 8;
> +
>   	/*
>   	 * The subslice disable field is global, i.e. it applies
>   	 * to each of the enabled slices.
>   	 */
> -	sseu->subslice_mask = GENMASK(ss_max - 1, 0);
> -	sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
> -				 GEN8_F2_SS_DIS_SHIFT);
> +	subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
> +	subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
> +			   GEN8_F2_SS_DIS_SHIFT);
>   
>   	eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK;
>   	eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) |
> @@ -314,30 +412,39 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
>   	 * Iterate through enabled slices and subslices to
>   	 * count the total enabled EU.
>   	 */
> -	for (s = 0; s < s_max; s++) {
> +	for (s = 0; s < sseu->max_slices; s++) {
>   		if (!(sseu->slice_mask & BIT(s)))
>   			/* skip disabled slice */
>   			continue;
>   
> -		for (ss = 0; ss < ss_max; ss++) {
> +		sseu->subslice_mask[s] = subslice_mask;
> +
> +		for (ss = 0; ss < sseu->max_subslices; ss++) {
> +			u8 eu_disabled_mask;
>   			u32 n_disabled;
>   
> -			if (!(sseu->subslice_mask & BIT(ss)))
> +			if (!(sseu->subslice_mask[ss] & BIT(ss)))
>   				/* skip disabled subslice */
>   				continue;
>   
> -			n_disabled = hweight8(eu_disable[s] >> (ss * eu_max));
> +			eu_disabled_mask =
> +				eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
> +
> +			sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
> +
> +			n_disabled = hweight8(eu_disabled_mask);
>   
>   			/*
>   			 * Record which subslices have 7 EUs.
>   			 */
> -			if (eu_max - n_disabled == 7)
> +			if (sseu->max_eus_per_subslice - n_disabled == 7)
>   				sseu->subslice_7eu[s] |= 1 << ss;
> -
> -			sseu->eu_total += eu_max - n_disabled;
>   		}
>   	}
>   
> +	sseu->subslice_total = compute_subslice_total(sseu);
> +	sseu->eu_total = compute_eu_total(sseu);
> +
>   	/*
>   	 * BDW is expected to always have a uniform distribution of EU across
>   	 * subslices with the exception that any one EU in any one subslice may
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 49cb27bd04c1..36e0df87862d 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -110,10 +110,14 @@ enum intel_platform {
>   	func(supports_tv); \
>   	func(has_ipc);
>   
> +#define GEN_MAX_SLICES		(6) /* CNL upper bound */
> +#define GEN_MAX_SUBSLICES	(7)
> +
>   struct sseu_dev_info {
>   	u8 slice_mask;
> -	u8 subslice_mask;
> -	u8 eu_total;
> +	u8 subslice_mask[GEN_MAX_SUBSLICES];
> +	u16 subslice_total;
> +	u16 eu_total;
>   	u8 eu_per_subslice;
>   	u8 min_eu_in_pool;
>   	/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
> @@ -121,6 +125,17 @@ struct sseu_dev_info {
>   	u8 has_slice_pg:1;
>   	u8 has_subslice_pg:1;
>   	u8 has_eu_pg:1;
> +
> +	/* Topology fields */
> +	u8 max_slices;
> +	u8 max_subslices;
> +	u8 max_eus_per_subslice;
> +
> +	/* We don't have more than 8 eus per subslice at the moment and as we
> +	 * store eus enabled using bits, no need to multiply by eus per
> +	 * subslice.
> +	 */
> +	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
>   };
>   
>   struct intel_device_info {
> @@ -167,7 +182,33 @@ struct intel_device_info {
>   
>   static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
>   {
> -	return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
> +	return sseu->subslice_total;
> +}
> +
> +static inline int sseu_eu_idx(const struct sseu_dev_info *sseu,
> +			      int slice, int subslice)
> +{
> +	int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice,
> +					   BITS_PER_BYTE);
> +	int slice_stride = sseu->max_subslices * subslice_stride;
> +
> +	return slice * slice_stride + subslice * subslice_stride;
> +}
> +
> +/*
> + * The following functions prototypes should be updated with a larger type
> + * than u8 if we ever have more than 8 EUs per subslice.
> + */
> +static inline u8 sseu_get_eus(const struct sseu_dev_info *sseu,
> +			      int slice, int subslice)
> +{
> +	return sseu->eu_mask[sseu_eu_idx(sseu, slice, subslice)];
> +}
> +
> +static inline void sseu_set_eus(struct sseu_dev_info *sseu,
> +				int slice, int subslice, u8 eu_mask)
> +{
> +	sseu->eu_mask[sseu_eu_idx(sseu, slice, subslice)] = eu_mask;
>   }
>   
>   const char *intel_platform_name(enum intel_platform platform);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index ff25f209d0a5..ac7896031b8d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2098,7 +2098,7 @@ make_rpcs(struct drm_i915_private *dev_priv)
>   
>   	if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
>   		rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
> -		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
> +		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) <<
>   			GEN8_RPCS_SS_CNT_SHIFT;
>   		rpcs |= GEN8_RPCS_ENABLE;
>   	}
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index c5ff203e42d6..23ae9a957fab 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -90,7 +90,7 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
>   
>   #define instdone_subslice_mask(dev_priv__) \
>   	(INTEL_GEN(dev_priv__) == 7 ? \
> -	 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
> +	 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0])
>   
>   #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
>   	for ((slice__) = 0, (subslice__) = 0; \
> 

I am happy with this version and I did not spot any mistakes.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Do you know if we have any test coverage which would be able to tell us 
if the reported numbers before and after match?

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2018-01-15 17:37 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-15 14:41 [PATCH v4 0/6] drm/i915: expose RCS topology to userspace Lionel Landwerlin
2018-01-15 14:41 ` [PATCH v4 1/6] drm/i915: store all subslice masks Lionel Landwerlin
2018-01-15 17:37   ` Tvrtko Ursulin [this message]
2018-01-15 18:09     ` Lionel Landwerlin
2018-01-15 14:41 ` [PATCH v4 2/6] drm/i915/debugfs: reuse max slice/subslices already stored in sseu Lionel Landwerlin
2018-01-15 14:41 ` [PATCH v4 3/6] drm/i915/debugfs: add rcs topology entry Lionel Landwerlin
2018-01-15 17:42   ` Tvrtko Ursulin
2018-01-15 18:12     ` Lionel Landwerlin
2018-01-15 14:41 ` [PATCH v4 4/6] drm/i915: add rcs topology to error state Lionel Landwerlin
2018-01-15 17:43   ` Tvrtko Ursulin
2018-01-16 13:33     ` Lionel Landwerlin
2018-01-15 14:41 ` [PATCH v4 5/6] drm/i915: add query uAPI Lionel Landwerlin
2018-01-15 14:41 ` [PATCH v4 6/6] drm/i915: expose rcs topology through " Lionel Landwerlin
2018-01-15 17:54   ` Tvrtko Ursulin
2018-01-15 18:23     ` Lionel Landwerlin
2018-01-16  8:42       ` Tvrtko Ursulin
2018-01-16 10:42         ` Lionel Landwerlin
2018-01-15 15:31 ` ✓ Fi.CI.BAT: success for drm/i915: expose RCS topology to userspace Patchwork
2018-01-15 19:18 ` ✗ Fi.CI.IGT: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=61e6c5d8-4c40-1ab5-1105-caadc5a752b7@linux.intel.com \
    --to=tvrtko.ursulin@linux.intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=lionel.g.landwerlin@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox