From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Praveen Paneri <praveen.paneri@intel.com>,
intel-gfx@lists.freedesktop.org
Cc: Zhe Wang <zhe1.wang@intel.com>, rodrigo.vivi@intel.com
Subject: Re: [PATCH v3] drm/i915/bxt: Broxton decoupled MMIO
Date: Wed, 5 Oct 2016 14:50:04 +0100 [thread overview]
Message-ID: <9ae05af7-ee73-c233-75c4-163a91318982@linux.intel.com> (raw)
In-Reply-To: <1475595966-14754-1-git-send-email-praveen.paneri@intel.com>
On 04/10/2016 16:46, Praveen Paneri wrote:
> Decoupled MMIO is an alternative way to access forcewake domain
> registers, which requires less cycles for a single read/write and
> avoids frequent software forcewake.
> This certainly gives advantage over the forcewake as this new
> mechanism “decouples” CPU cycles and allow them to complete even
> when GT is in a CPD (frequency change) or C6 state.
>
> This can co-exist with forcewake and we will continue to use forcewake
> as appropriate. E.g. 64-bit register writes to avoid writing 2 dwords
> separately and land into funny situations.
>
> v2:
> - Moved platform check out of the function and got rid of duplicate
> functions to find out decoupled power domain (Chris)
> - Added a check for forcewake already held and skipped decoupled
> access (Chris)
> - Skipped writing 64 bit registers through decoupled MMIO (Chris)
>
> v3:
> - Improved commit message with more info on decoupled mmio (Tvrtko)
> - Changed decoupled operation to enum and used u32 instead of
> uint_32 data type for register offset (Tvrtko)
> - Moved HAS_DECOUPLED_MMIO to device info (Tvrtko)
> - Added lookup table for converting fw_engine to pd_engine (Tvrtko)
> - Improved __gen9_decoupled_read and __gen9_decoupled_write routines (Tvrtko)
>
> Signed-off-by: Zhe Wang <zhe1.wang@intel.com>
> Signed-off-by: Praveen Paneri <praveen.paneri@intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 18 +++++-
> drivers/gpu/drm/i915/i915_pci.c | 1 +
> drivers/gpu/drm/i915/i915_reg.h | 7 +++
> drivers/gpu/drm/i915/intel_uncore.c | 113 ++++++++++++++++++++++++++++++++++++
> 4 files changed, 138 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index f8c66ee..bfdd55a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -559,6 +559,18 @@ enum forcewake_domains {
> #define FW_REG_READ (1)
> #define FW_REG_WRITE (2)
>
> +enum decoupled_power_domains {
> + GEN9_DECOUPLED_PD_BLITTER = 0,
> + GEN9_DECOUPLED_PD_RENDER,
> + GEN9_DECOUPLED_PD_MEDIA,
> + GEN9_DECOUPLED_PD_ALL
> +};
> +
> +enum decoupled_ops {
> + GEN9_DECOUPLED_OP_WRITE = 0,
> + GEN9_DECOUPLED_OP_READ
> +};
> +
> enum forcewake_domains
> intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
> i915_reg_t reg, unsigned int op);
> @@ -690,7 +702,8 @@ struct intel_csr {
> func(has_snoop) sep \
> func(has_ddi) sep \
> func(has_fpga_dbg) sep \
> - func(has_pooled_eu)
> + func(has_pooled_eu) sep \
> + func(has_decoupled_mmio)
>
> #define DEFINE_FLAG(name) u8 name:1
> #define SEP_SEMICOLON ;
> @@ -2869,6 +2882,9 @@ struct drm_i915_cmd_table {
> #define GT_FREQUENCY_MULTIPLIER 50
> #define GEN9_FREQ_SCALER 3
>
> +#define HAS_DECOUPLED_MMIO(dev) (INTEL_INFO(dev)->has_decoupled_mmio \
> + && IS_BXT_REVID(dev, BXT_REVID_C0, REVID_FOREVER))
> +
> #include "i915_trace.h"
>
> static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 31e6edd..5c56c0c 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -360,6 +360,7 @@ static const struct intel_device_info intel_broxton_info = {
> .has_hw_contexts = 1,
> .has_logical_ring_contexts = 1,
> .has_guc = 1,
> + .has_decoupled_mmio = 1,
> .ddb_size = 512,
> GEN_DEFAULT_PIPEOFFSETS,
> IVB_CURSOR_OFFSETS,
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 8d44cee..bf7b4c9 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7398,6 +7398,13 @@ enum {
> #define SKL_FUSE_PG1_DIST_STATUS (1<<26)
> #define SKL_FUSE_PG2_DIST_STATUS (1<<25)
>
> +/* Decoupled MMIO register pair for kernel driver */
> +#define GEN9_DECOUPLED_REG0_DW0 _MMIO(0xF00)
> +#define GEN9_DECOUPLED_REG0_DW1 _MMIO(0xF04)
> +#define GEN9_DECOUPLED_DW1_GO (1<<31)
> +#define GEN9_DECOUPLED_PD_SHIFT 28
> +#define GEN9_DECOUPLED_OP_SHIFT 24
> +
> /* Per-pipe DDI Function Control */
> #define _TRANS_DDI_FUNC_CTL_A 0x60400
> #define _TRANS_DDI_FUNC_CTL_B 0x61400
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index e2b188d..0af602e 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -831,6 +831,72 @@ unclaimed_reg_debug(struct drm_i915_private *dev_priv,
> __unclaimed_reg_debug(dev_priv, reg, read, before);
> }
>
> +static const enum decoupled_power_domains fw2dpd_engine[] = {
> + GEN9_DECOUPLED_PD_RENDER,
> + GEN9_DECOUPLED_PD_BLITTER,
> + GEN9_DECOUPLED_PD_ALL,
> + GEN9_DECOUPLED_PD_MEDIA,
> + GEN9_DECOUPLED_PD_ALL,
> + GEN9_DECOUPLED_PD_ALL,
> + GEN9_DECOUPLED_PD_ALL
> +};
> +
> +/*
> + * Decoupled MMIO access for only 1 DWORD
> + */
> +static void __gen9_decoupled_mmio_access(struct drm_i915_private *dev_priv,
> + u32 reg,
> + enum forcewake_domains fw_engine,
> + enum decoupled_ops operation)
> +{
> + enum decoupled_power_domains dpd_engine;
> + u32 ctrl_reg_data = 0;
> +
> + dpd_engine = fw2dpd_engine[fw_engine - 1];
> +
> + ctrl_reg_data |= reg;
> + ctrl_reg_data |= (operation << GEN9_DECOUPLED_OP_SHIFT);
> + ctrl_reg_data |= (dpd_engine << GEN9_DECOUPLED_PD_SHIFT);
> + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> + ctrl_reg_data |= GEN9_DECOUPLED_DW1_GO;
> + __raw_i915_write32(dev_priv, GEN9_DECOUPLED_REG0_DW1, ctrl_reg_data);
> +
> + if (wait_for_atomic((__raw_i915_read32(dev_priv,
> + GEN9_DECOUPLED_REG0_DW1) & GEN9_DECOUPLED_DW1_GO) == 0,
> + FORCEWAKE_ACK_TIMEOUT_MS))
> +
I asked about the timeout before. Is the forcewake ack timeout
applicable for decoupled mmio or a better value should be used?
Also, do you have any numbers on how fast decoupled access typically is?
In other words, how does it compare with existing code for accesses not
done under an explicit forcewake get? Is a seqeunce of I915_READs for
example faster with decoupled mmio than under the current scheme of
automatic forcewake grab/release?
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2016-10-05 13:50 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-09-06 5:24 [PATCH] drm/i915/bxt: Broxton decoupled MMIO Praveen Paneri
2016-09-06 5:51 ` ✗ Fi.CI.BAT: warning for " Patchwork
2016-09-06 6:36 ` [PATCH] " Chris Wilson
2016-09-19 17:05 ` Praveen Paneri
2016-09-19 17:15 ` [PATCH v2] " Praveen Paneri
2016-09-23 9:49 ` Tvrtko Ursulin
2016-09-26 11:08 ` Paneri, Praveen
2016-09-26 20:23 ` Tvrtko Ursulin
2016-10-04 15:46 ` [PATCH v3] " Praveen Paneri
2016-10-04 17:43 ` Vivi, Rodrigo
2016-10-04 19:56 ` Chris Wilson
2016-10-05 3:17 ` Praveen Paneri
2016-10-05 6:24 ` Praveen Paneri
2016-11-15 6:40 ` [PATCH v4] " Praveen Paneri
2016-11-15 9:36 ` Tvrtko Ursulin
2016-11-15 10:07 ` Chris Wilson
2016-11-15 13:17 ` Praveen Paneri
2016-11-15 14:44 ` Tvrtko Ursulin
2016-11-15 17:19 ` [PATCH v5] " Praveen Paneri
2016-11-16 8:25 ` Tvrtko Ursulin
2016-11-16 9:03 ` Praveen Paneri
2016-11-16 9:08 ` Tvrtko Ursulin
2016-11-16 9:18 ` Chris Wilson
2016-11-15 10:56 ` [PATCH v4] " Praveen Paneri
2016-11-15 10:59 ` Tvrtko Ursulin
2016-10-05 13:50 ` Tvrtko Ursulin [this message]
2016-10-10 17:03 ` [PATCH v2] " Carlos Santa
2016-09-19 17:55 ` ✗ Fi.CI.BAT: warning for drm/i915/bxt: Broxton decoupled MMIO (rev2) Patchwork
2016-10-04 16:19 ` ✗ Fi.CI.BAT: warning for drm/i915/bxt: Broxton decoupled MMIO (rev3) Patchwork
2016-11-15 7:16 ` ✓ Fi.CI.BAT: success for drm/i915/bxt: Broxton decoupled MMIO (rev4) Patchwork
2016-11-15 18:15 ` ✓ Fi.CI.BAT: success for drm/i915/bxt: Broxton decoupled MMIO (rev5) Patchwork
2016-11-16 9:38 ` Tvrtko Ursulin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=9ae05af7-ee73-c233-75c4-163a91318982@linux.intel.com \
--to=tvrtko.ursulin@linux.intel.com \
--cc=intel-gfx@lists.freedesktop.org \
--cc=praveen.paneri@intel.com \
--cc=rodrigo.vivi@intel.com \
--cc=zhe1.wang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).