public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH i-g-t 1/4] headers: bump
  2018-11-13 14:36 [PATCH i-g-t 0/4] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
@ 2018-11-13 14:36 ` Tvrtko Ursulin
  0 siblings, 0 replies; 10+ messages in thread
From: Tvrtko Ursulin @ 2018-11-13 14:36 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

---
 include/drm-uapi/amdgpu_drm.h  |  52 +++-
 include/drm-uapi/drm.h         |  17 ++
 include/drm-uapi/drm_fourcc.h  | 224 +++++++++++++++
 include/drm-uapi/drm_mode.h    |  26 +-
 include/drm-uapi/etnaviv_drm.h |   6 +
 include/drm-uapi/exynos_drm.h  | 240 ++++++++++++++++
 include/drm-uapi/i915_drm.h    |  80 ++++++
 include/drm-uapi/msm_drm.h     |   2 +
 include/drm-uapi/tegra_drm.h   | 492 ++++++++++++++++++++++++++++++++-
 include/drm-uapi/v3d_drm.h     | 204 ++++++++++++++
 include/drm-uapi/vc4_drm.h     |  13 +-
 include/drm-uapi/virtgpu_drm.h |   1 +
 include/drm-uapi/vmwgfx_drm.h  | 166 ++++++++---
 13 files changed, 1467 insertions(+), 56 deletions(-)
 create mode 100644 include/drm-uapi/v3d_drm.h

diff --git a/include/drm-uapi/amdgpu_drm.h b/include/drm-uapi/amdgpu_drm.h
index 1816bd8200d1..370e9a5536ef 100644
--- a/include/drm-uapi/amdgpu_drm.h
+++ b/include/drm-uapi/amdgpu_drm.h
@@ -72,12 +72,41 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_FENCE_TO_HANDLE, union drm_amdgpu_fence_to_handle)
 #define DRM_IOCTL_AMDGPU_SCHED		DRM_IOW(DRM_COMMAND_BASE + DRM_AMDGPU_SCHED, union drm_amdgpu_sched)
 
+/**
+ * DOC: memory domains
+ *
+ * %AMDGPU_GEM_DOMAIN_CPU	System memory that is not GPU accessible.
+ * Memory in this pool could be swapped out to disk if there is pressure.
+ *
+ * %AMDGPU_GEM_DOMAIN_GTT	GPU accessible system memory, mapped into the
+ * GPU's virtual address space via gart. Gart memory linearizes non-contiguous
+ * pages of system memory, allows GPU access system memory in a linezrized
+ * fashion.
+ *
+ * %AMDGPU_GEM_DOMAIN_VRAM	Local video memory. For APUs, it is memory
+ * carved out by the BIOS.
+ *
+ * %AMDGPU_GEM_DOMAIN_GDS	Global on-chip data storage used to share data
+ * across shader threads.
+ *
+ * %AMDGPU_GEM_DOMAIN_GWS	Global wave sync, used to synchronize the
+ * execution of all the waves on a device.
+ *
+ * %AMDGPU_GEM_DOMAIN_OA	Ordered append, used by 3D or Compute engines
+ * for appending data.
+ */
 #define AMDGPU_GEM_DOMAIN_CPU		0x1
 #define AMDGPU_GEM_DOMAIN_GTT		0x2
 #define AMDGPU_GEM_DOMAIN_VRAM		0x4
 #define AMDGPU_GEM_DOMAIN_GDS		0x8
 #define AMDGPU_GEM_DOMAIN_GWS		0x10
 #define AMDGPU_GEM_DOMAIN_OA		0x20
+#define AMDGPU_GEM_DOMAIN_MASK		(AMDGPU_GEM_DOMAIN_CPU | \
+					 AMDGPU_GEM_DOMAIN_GTT | \
+					 AMDGPU_GEM_DOMAIN_VRAM | \
+					 AMDGPU_GEM_DOMAIN_GDS | \
+					 AMDGPU_GEM_DOMAIN_GWS | \
+					 AMDGPU_GEM_DOMAIN_OA)
 
 /* Flag that CPU access will be required for the case of VRAM domain */
 #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED	(1 << 0)
@@ -95,6 +124,10 @@ extern "C" {
 #define AMDGPU_GEM_CREATE_VM_ALWAYS_VALID	(1 << 6)
 /* Flag that BO sharing will be explicitly synchronized */
 #define AMDGPU_GEM_CREATE_EXPLICIT_SYNC		(1 << 7)
+/* Flag that indicates allocating MQD gart on GFX9, where the mtype
+ * for the second page onward should be set to NC.
+ */
+#define AMDGPU_GEM_CREATE_MQD_GFX9		(1 << 8)
 
 struct drm_amdgpu_gem_create_in  {
 	/** the requested memory size */
@@ -473,7 +506,8 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_HW_IP_UVD_ENC      5
 #define AMDGPU_HW_IP_VCN_DEC      6
 #define AMDGPU_HW_IP_VCN_ENC      7
-#define AMDGPU_HW_IP_NUM          8
+#define AMDGPU_HW_IP_VCN_JPEG     8
+#define AMDGPU_HW_IP_NUM          9
 
 #define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1
 
@@ -482,6 +516,7 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
 #define AMDGPU_CHUNK_ID_SYNCOBJ_IN      0x04
 #define AMDGPU_CHUNK_ID_SYNCOBJ_OUT     0x05
+#define AMDGPU_CHUNK_ID_BO_HANDLES      0x06
 
 struct drm_amdgpu_cs_chunk {
 	__u32		chunk_id;
@@ -520,6 +555,10 @@ union drm_amdgpu_cs {
 /* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */
 #define AMDGPU_IB_FLAG_PREEMPT (1<<2)
 
+/* The IB fence should do the L2 writeback but not invalidate any shader
+ * caches (L2/vL1/sL1/I$). */
+#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
+
 struct drm_amdgpu_cs_chunk_ib {
 	__u32 _pad;
 	/** AMDGPU_IB_FLAG_* */
@@ -618,6 +657,16 @@ struct drm_amdgpu_cs_chunk_data {
 	#define AMDGPU_INFO_FW_SOS		0x0c
 	/* Subquery id: Query PSP ASD firmware version */
 	#define AMDGPU_INFO_FW_ASD		0x0d
+	/* Subquery id: Query VCN firmware version */
+	#define AMDGPU_INFO_FW_VCN		0x0e
+	/* Subquery id: Query GFX RLC SRLC firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_CNTL 0x0f
+	/* Subquery id: Query GFX RLC SRLG firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM 0x10
+	/* Subquery id: Query GFX RLC SRLS firmware version */
+	#define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11
+	/* Subquery id: Query DMCU firmware version */
+	#define AMDGPU_INFO_FW_DMCU		0x12
 /* number of bytes moved for TTM migration */
 #define AMDGPU_INFO_NUM_BYTES_MOVED		0x0f
 /* the used VRAM size */
@@ -806,6 +855,7 @@ struct drm_amdgpu_info_firmware {
 #define AMDGPU_VRAM_TYPE_GDDR5 5
 #define AMDGPU_VRAM_TYPE_HBM   6
 #define AMDGPU_VRAM_TYPE_DDR3  7
+#define AMDGPU_VRAM_TYPE_DDR4  8
 
 struct drm_amdgpu_info_device {
 	/** PCI Device ID */
diff --git a/include/drm-uapi/drm.h b/include/drm-uapi/drm.h
index f0bd91de0cf9..02b4169555a5 100644
--- a/include/drm-uapi/drm.h
+++ b/include/drm-uapi/drm.h
@@ -674,6 +674,22 @@ struct drm_get_cap {
  */
 #define DRM_CLIENT_CAP_ATOMIC	3
 
+/**
+ * DRM_CLIENT_CAP_ASPECT_RATIO
+ *
+ * If set to 1, the DRM core will provide aspect ratio information in modes.
+ */
+#define DRM_CLIENT_CAP_ASPECT_RATIO    4
+
+/**
+ * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS
+ *
+ * If set to 1, the DRM core will expose special connectors to be used for
+ * writing back to memory the scene setup in the commit. Depends on client
+ * also supporting DRM_CLIENT_CAP_ATOMIC
+ */
+#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS	5
+
 /** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
 struct drm_set_client_cap {
 	__u64 capability;
@@ -695,6 +711,7 @@ struct drm_prime_handle {
 struct drm_syncobj_create {
 	__u32 handle;
 #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
+#define DRM_SYNCOBJ_CREATE_TYPE_TIMELINE (1 << 1)
 	__u32 flags;
 };
 
diff --git a/include/drm-uapi/drm_fourcc.h b/include/drm-uapi/drm_fourcc.h
index e04613d30a13..0cd40ebfa1b1 100644
--- a/include/drm-uapi/drm_fourcc.h
+++ b/include/drm-uapi/drm_fourcc.h
@@ -30,11 +30,50 @@
 extern "C" {
 #endif
 
+/**
+ * DOC: overview
+ *
+ * In the DRM subsystem, framebuffer pixel formats are described using the
+ * fourcc codes defined in `include/uapi/drm/drm_fourcc.h`. In addition to the
+ * fourcc code, a Format Modifier may optionally be provided, in order to
+ * further describe the buffer's format - for example tiling or compression.
+ *
+ * Format Modifiers
+ * ----------------
+ *
+ * Format modifiers are used in conjunction with a fourcc code, forming a
+ * unique fourcc:modifier pair. This format:modifier pair must fully define the
+ * format and data layout of the buffer, and should be the only way to describe
+ * that particular buffer.
+ *
+ * Having multiple fourcc:modifier pairs which describe the same layout should
+ * be avoided, as such aliases run the risk of different drivers exposing
+ * different names for the same data format, forcing userspace to understand
+ * that they are aliases.
+ *
+ * Format modifiers may change any property of the buffer, including the number
+ * of planes and/or the required allocation size. Format modifiers are
+ * vendor-namespaced, and as such the relationship between a fourcc code and a
+ * modifier is specific to the modifer being used. For example, some modifiers
+ * may preserve meaning - such as number of planes - from the fourcc code,
+ * whereas others may not.
+ *
+ * Vendors should document their modifier usage in as much detail as
+ * possible, to ensure maximum compatibility across devices, drivers and
+ * applications.
+ *
+ * The authoritative list of format modifier codes is found in
+ * `include/uapi/drm/drm_fourcc.h`
+ */
+
 #define fourcc_code(a, b, c, d) ((__u32)(a) | ((__u32)(b) << 8) | \
 				 ((__u32)(c) << 16) | ((__u32)(d) << 24))
 
 #define DRM_FORMAT_BIG_ENDIAN (1<<31) /* format is big endian instead of little endian */
 
+/* Reserve 0 for the invalid format specifier */
+#define DRM_FORMAT_INVALID	0
+
 /* color index */
 #define DRM_FORMAT_C8		fourcc_code('C', '8', ' ', ' ') /* [7:0] C */
 
@@ -183,6 +222,7 @@ extern "C" {
 #define DRM_FORMAT_MOD_VENDOR_QCOM    0x05
 #define DRM_FORMAT_MOD_VENDOR_VIVANTE 0x06
 #define DRM_FORMAT_MOD_VENDOR_BROADCOM 0x07
+#define DRM_FORMAT_MOD_VENDOR_ARM     0x08
 /* add more to the end as needed */
 
 #define DRM_FORMAT_RESERVED	      ((1ULL << 56) - 1)
@@ -298,6 +338,28 @@ extern "C" {
  */
 #define DRM_FORMAT_MOD_SAMSUNG_64_32_TILE	fourcc_mod_code(SAMSUNG, 1)
 
+/*
+ * Tiled, 16 (pixels) x 16 (lines) - sized macroblocks
+ *
+ * This is a simple tiled layout using tiles of 16x16 pixels in a row-major
+ * layout. For YCbCr formats Cb/Cr components are taken in such a way that
+ * they correspond to their 16x16 luma block.
+ */
+#define DRM_FORMAT_MOD_SAMSUNG_16_16_TILE	fourcc_mod_code(SAMSUNG, 2)
+
+/*
+ * Qualcomm Compressed Format
+ *
+ * Refers to a compressed variant of the base format that is compressed.
+ * Implementation may be platform and base-format specific.
+ *
+ * Each macrotile consists of m x n (mostly 4 x 4) tiles.
+ * Pixel data pitch/stride is aligned with macrotile width.
+ * Pixel data height is aligned with macrotile height.
+ * Entire pixel data buffer is aligned with 4k(bytes).
+ */
+#define DRM_FORMAT_MOD_QCOM_COMPRESSED	fourcc_mod_code(QCOM, 1)
+
 /* Vivante framebuffer modifiers */
 
 /*
@@ -384,6 +446,23 @@ extern "C" {
 #define DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK_THIRTYTWO_GOB \
 	fourcc_mod_code(NVIDIA, 0x15)
 
+/*
+ * Some Broadcom modifiers take parameters, for example the number of
+ * vertical lines in the image. Reserve the lower 32 bits for modifier
+ * type, and the next 24 bits for parameters. Top 8 bits are the
+ * vendor code.
+ */
+#define __fourcc_mod_broadcom_param_shift 8
+#define __fourcc_mod_broadcom_param_bits 48
+#define fourcc_mod_broadcom_code(val, params) \
+	fourcc_mod_code(BROADCOM, ((((__u64)params) << __fourcc_mod_broadcom_param_shift) | val))
+#define fourcc_mod_broadcom_param(m) \
+	((int)(((m) >> __fourcc_mod_broadcom_param_shift) &	\
+	       ((1ULL << __fourcc_mod_broadcom_param_bits) - 1)))
+#define fourcc_mod_broadcom_mod(m) \
+	((m) & ~(((1ULL << __fourcc_mod_broadcom_param_bits) - 1) <<	\
+		 __fourcc_mod_broadcom_param_shift))
+
 /*
  * Broadcom VC4 "T" format
  *
@@ -405,6 +484,151 @@ extern "C" {
  */
 #define DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED fourcc_mod_code(BROADCOM, 1)
 
+/*
+ * Broadcom SAND format
+ *
+ * This is the native format that the H.264 codec block uses.  For VC4
+ * HVS, it is only valid for H.264 (NV12/21) and RGBA modes.
+ *
+ * The image can be considered to be split into columns, and the
+ * columns are placed consecutively into memory.  The width of those
+ * columns can be either 32, 64, 128, or 256 pixels, but in practice
+ * only 128 pixel columns are used.
+ *
+ * The pitch between the start of each column is set to optimally
+ * switch between SDRAM banks. This is passed as the number of lines
+ * of column width in the modifier (we can't use the stride value due
+ * to various core checks that look at it , so you should set the
+ * stride to width*cpp).
+ *
+ * Note that the column height for this format modifier is the same
+ * for all of the planes, assuming that each column contains both Y
+ * and UV.  Some SAND-using hardware stores UV in a separate tiled
+ * image from Y to reduce the column height, which is not supported
+ * with these modifiers.
+ */
+
+#define DRM_FORMAT_MOD_BROADCOM_SAND32_COL_HEIGHT(v) \
+	fourcc_mod_broadcom_code(2, v)
+#define DRM_FORMAT_MOD_BROADCOM_SAND64_COL_HEIGHT(v) \
+	fourcc_mod_broadcom_code(3, v)
+#define DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(v) \
+	fourcc_mod_broadcom_code(4, v)
+#define DRM_FORMAT_MOD_BROADCOM_SAND256_COL_HEIGHT(v) \
+	fourcc_mod_broadcom_code(5, v)
+
+#define DRM_FORMAT_MOD_BROADCOM_SAND32 \
+	DRM_FORMAT_MOD_BROADCOM_SAND32_COL_HEIGHT(0)
+#define DRM_FORMAT_MOD_BROADCOM_SAND64 \
+	DRM_FORMAT_MOD_BROADCOM_SAND64_COL_HEIGHT(0)
+#define DRM_FORMAT_MOD_BROADCOM_SAND128 \
+	DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(0)
+#define DRM_FORMAT_MOD_BROADCOM_SAND256 \
+	DRM_FORMAT_MOD_BROADCOM_SAND256_COL_HEIGHT(0)
+
+/* Broadcom UIF format
+ *
+ * This is the common format for the current Broadcom multimedia
+ * blocks, including V3D 3.x and newer, newer video codecs, and
+ * displays.
+ *
+ * The image consists of utiles (64b blocks), UIF blocks (2x2 utiles),
+ * and macroblocks (4x4 UIF blocks).  Those 4x4 UIF block groups are
+ * stored in columns, with padding between the columns to ensure that
+ * moving from one column to the next doesn't hit the same SDRAM page
+ * bank.
+ *
+ * To calculate the padding, it is assumed that each hardware block
+ * and the software driving it knows the platform's SDRAM page size,
+ * number of banks, and XOR address, and that it's identical between
+ * all blocks using the format.  This tiling modifier will use XOR as
+ * necessary to reduce the padding.  If a hardware block can't do XOR,
+ * the assumption is that a no-XOR tiling modifier will be created.
+ */
+#define DRM_FORMAT_MOD_BROADCOM_UIF fourcc_mod_code(BROADCOM, 6)
+
+/*
+ * Arm Framebuffer Compression (AFBC) modifiers
+ *
+ * AFBC is a proprietary lossless image compression protocol and format.
+ * It provides fine-grained random access and minimizes the amount of data
+ * transferred between IP blocks.
+ *
+ * AFBC has several features which may be supported and/or used, which are
+ * represented using bits in the modifier. Not all combinations are valid,
+ * and different devices or use-cases may support different combinations.
+ */
+#define DRM_FORMAT_MOD_ARM_AFBC(__afbc_mode)	fourcc_mod_code(ARM, __afbc_mode)
+
+/*
+ * AFBC superblock size
+ *
+ * Indicates the superblock size(s) used for the AFBC buffer. The buffer
+ * size (in pixels) must be aligned to a multiple of the superblock size.
+ * Four lowest significant bits(LSBs) are reserved for block size.
+ */
+#define AFBC_FORMAT_MOD_BLOCK_SIZE_MASK      0xf
+#define AFBC_FORMAT_MOD_BLOCK_SIZE_16x16     (1ULL)
+#define AFBC_FORMAT_MOD_BLOCK_SIZE_32x8      (2ULL)
+
+/*
+ * AFBC lossless colorspace transform
+ *
+ * Indicates that the buffer makes use of the AFBC lossless colorspace
+ * transform.
+ */
+#define AFBC_FORMAT_MOD_YTR     (1ULL <<  4)
+
+/*
+ * AFBC block-split
+ *
+ * Indicates that the payload of each superblock is split. The second
+ * half of the payload is positioned at a predefined offset from the start
+ * of the superblock payload.
+ */
+#define AFBC_FORMAT_MOD_SPLIT   (1ULL <<  5)
+
+/*
+ * AFBC sparse layout
+ *
+ * This flag indicates that the payload of each superblock must be stored at a
+ * predefined position relative to the other superblocks in the same AFBC
+ * buffer. This order is the same order used by the header buffer. In this mode
+ * each superblock is given the same amount of space as an uncompressed
+ * superblock of the particular format would require, rounding up to the next
+ * multiple of 128 bytes in size.
+ */
+#define AFBC_FORMAT_MOD_SPARSE  (1ULL <<  6)
+
+/*
+ * AFBC copy-block restrict
+ *
+ * Buffers with this flag must obey the copy-block restriction. The restriction
+ * is such that there are no copy-blocks referring across the border of 8x8
+ * blocks. For the subsampled data the 8x8 limitation is also subsampled.
+ */
+#define AFBC_FORMAT_MOD_CBR     (1ULL <<  7)
+
+/*
+ * AFBC tiled layout
+ *
+ * The tiled layout groups superblocks in 8x8 or 4x4 tiles, where all
+ * superblocks inside a tile are stored together in memory. 8x8 tiles are used
+ * for pixel formats up to and including 32 bpp while 4x4 tiles are used for
+ * larger bpp formats. The order between the tiles is scan line.
+ * When the tiled layout is used, the buffer size (in pixels) must be aligned
+ * to the tile size.
+ */
+#define AFBC_FORMAT_MOD_TILED   (1ULL <<  8)
+
+/*
+ * AFBC solid color blocks
+ *
+ * Indicates that the buffer makes use of solid-color blocks, whereby bandwidth
+ * can be reduced if a whole superblock is a single color.
+ */
+#define AFBC_FORMAT_MOD_SC      (1ULL <<  9)
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/drm-uapi/drm_mode.h b/include/drm-uapi/drm_mode.h
index 2c575794fb52..d3e0fe31efc5 100644
--- a/include/drm-uapi/drm_mode.h
+++ b/include/drm-uapi/drm_mode.h
@@ -93,6 +93,15 @@ extern "C" {
 #define DRM_MODE_PICTURE_ASPECT_NONE		0
 #define DRM_MODE_PICTURE_ASPECT_4_3		1
 #define DRM_MODE_PICTURE_ASPECT_16_9		2
+#define DRM_MODE_PICTURE_ASPECT_64_27		3
+#define DRM_MODE_PICTURE_ASPECT_256_135		4
+
+/* Content type options */
+#define DRM_MODE_CONTENT_TYPE_NO_DATA		0
+#define DRM_MODE_CONTENT_TYPE_GRAPHICS		1
+#define DRM_MODE_CONTENT_TYPE_PHOTO		2
+#define DRM_MODE_CONTENT_TYPE_CINEMA		3
+#define DRM_MODE_CONTENT_TYPE_GAME		4
 
 /* Aspect ratio flag bitmask (4 bits 22:19) */
 #define DRM_MODE_FLAG_PIC_AR_MASK		(0x0F<<19)
@@ -102,6 +111,10 @@ extern "C" {
 			(DRM_MODE_PICTURE_ASPECT_4_3<<19)
 #define  DRM_MODE_FLAG_PIC_AR_16_9 \
 			(DRM_MODE_PICTURE_ASPECT_16_9<<19)
+#define  DRM_MODE_FLAG_PIC_AR_64_27 \
+			(DRM_MODE_PICTURE_ASPECT_64_27<<19)
+#define  DRM_MODE_FLAG_PIC_AR_256_135 \
+			(DRM_MODE_PICTURE_ASPECT_256_135<<19)
 
 #define  DRM_MODE_FLAG_ALL	(DRM_MODE_FLAG_PHSYNC |		\
 				 DRM_MODE_FLAG_NHSYNC |		\
@@ -173,8 +186,9 @@ extern "C" {
 /*
  * DRM_MODE_REFLECT_<axis>
  *
- * Signals that the contents of a drm plane is reflected in the <axis> axis,
+ * Signals that the contents of a drm plane is reflected along the <axis> axis,
  * in the same way as mirroring.
+ * See kerneldoc chapter "Plane Composition Properties" for more details.
  *
  * This define is provided as a convenience, looking up the property id
  * using the name->prop id lookup is the preferred method.
@@ -338,6 +352,7 @@ enum drm_mode_subconnector {
 #define DRM_MODE_CONNECTOR_VIRTUAL      15
 #define DRM_MODE_CONNECTOR_DSI		16
 #define DRM_MODE_CONNECTOR_DPI		17
+#define DRM_MODE_CONNECTOR_WRITEBACK	18
 
 struct drm_mode_get_connector {
 
@@ -363,7 +378,7 @@ struct drm_mode_get_connector {
 	__u32 pad;
 };
 
-#define DRM_MODE_PROP_PENDING	(1<<0)
+#define DRM_MODE_PROP_PENDING	(1<<0) /* deprecated, do not use */
 #define DRM_MODE_PROP_RANGE	(1<<1)
 #define DRM_MODE_PROP_IMMUTABLE	(1<<2)
 #define DRM_MODE_PROP_ENUM	(1<<3) /* enumerated type with text strings */
@@ -598,8 +613,11 @@ struct drm_mode_crtc_lut {
 };
 
 struct drm_color_ctm {
-	/* Conversion matrix in S31.32 format. */
-	__s64 matrix[9];
+	/*
+	 * Conversion matrix in S31.32 sign-magnitude
+	 * (not two's complement!) format.
+	 */
+	__u64 matrix[9];
 };
 
 struct drm_color_lut {
diff --git a/include/drm-uapi/etnaviv_drm.h b/include/drm-uapi/etnaviv_drm.h
index e9b997a0ef27..0d5c49dc478c 100644
--- a/include/drm-uapi/etnaviv_drm.h
+++ b/include/drm-uapi/etnaviv_drm.h
@@ -55,6 +55,12 @@ struct drm_etnaviv_timespec {
 #define ETNAVIV_PARAM_GPU_FEATURES_4                0x07
 #define ETNAVIV_PARAM_GPU_FEATURES_5                0x08
 #define ETNAVIV_PARAM_GPU_FEATURES_6                0x09
+#define ETNAVIV_PARAM_GPU_FEATURES_7                0x0a
+#define ETNAVIV_PARAM_GPU_FEATURES_8                0x0b
+#define ETNAVIV_PARAM_GPU_FEATURES_9                0x0c
+#define ETNAVIV_PARAM_GPU_FEATURES_10               0x0d
+#define ETNAVIV_PARAM_GPU_FEATURES_11               0x0e
+#define ETNAVIV_PARAM_GPU_FEATURES_12               0x0f
 
 #define ETNAVIV_PARAM_GPU_STREAM_COUNT              0x10
 #define ETNAVIV_PARAM_GPU_REGISTER_MAX              0x11
diff --git a/include/drm-uapi/exynos_drm.h b/include/drm-uapi/exynos_drm.h
index a00116b5cc5c..7414cfd76419 100644
--- a/include/drm-uapi/exynos_drm.h
+++ b/include/drm-uapi/exynos_drm.h
@@ -135,6 +135,219 @@ struct drm_exynos_g2d_exec {
 	__u64					async;
 };
 
+/* Exynos DRM IPP v2 API */
+
+/**
+ * Enumerate available IPP hardware modules.
+ *
+ * @count_ipps: size of ipp_id array / number of ipp modules (set by driver)
+ * @reserved: padding
+ * @ipp_id_ptr: pointer to ipp_id array or NULL
+ */
+struct drm_exynos_ioctl_ipp_get_res {
+	__u32 count_ipps;
+	__u32 reserved;
+	__u64 ipp_id_ptr;
+};
+
+enum drm_exynos_ipp_format_type {
+	DRM_EXYNOS_IPP_FORMAT_SOURCE		= 0x01,
+	DRM_EXYNOS_IPP_FORMAT_DESTINATION	= 0x02,
+};
+
+struct drm_exynos_ipp_format {
+	__u32 fourcc;
+	__u32 type;
+	__u64 modifier;
+};
+
+enum drm_exynos_ipp_capability {
+	DRM_EXYNOS_IPP_CAP_CROP		= 0x01,
+	DRM_EXYNOS_IPP_CAP_ROTATE	= 0x02,
+	DRM_EXYNOS_IPP_CAP_SCALE	= 0x04,
+	DRM_EXYNOS_IPP_CAP_CONVERT	= 0x08,
+};
+
+/**
+ * Get IPP hardware capabilities and supported image formats.
+ *
+ * @ipp_id: id of IPP module to query
+ * @capabilities: bitmask of drm_exynos_ipp_capability (set by driver)
+ * @reserved: padding
+ * @formats_count: size of formats array (in entries) / number of filled
+ *		   formats (set by driver)
+ * @formats_ptr: pointer to formats array or NULL
+ */
+struct drm_exynos_ioctl_ipp_get_caps {
+	__u32 ipp_id;
+	__u32 capabilities;
+	__u32 reserved;
+	__u32 formats_count;
+	__u64 formats_ptr;
+};
+
+enum drm_exynos_ipp_limit_type {
+	/* size (horizontal/vertial) limits, in pixels (min, max, alignment) */
+	DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE		= 0x0001,
+	/* scale ratio (horizonta/vertial), 16.16 fixed point (min, max) */
+	DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE		= 0x0002,
+
+	/* image buffer area */
+	DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER	= 0x0001 << 16,
+	/* src/dst rectangle area */
+	DRM_EXYNOS_IPP_LIMIT_SIZE_AREA		= 0x0002 << 16,
+	/* src/dst rectangle area when rotation enabled */
+	DRM_EXYNOS_IPP_LIMIT_SIZE_ROTATED	= 0x0003 << 16,
+
+	DRM_EXYNOS_IPP_LIMIT_TYPE_MASK		= 0x000f,
+	DRM_EXYNOS_IPP_LIMIT_SIZE_MASK		= 0x000f << 16,
+};
+
+struct drm_exynos_ipp_limit_val {
+	__u32 min;
+	__u32 max;
+	__u32 align;
+	__u32 reserved;
+};
+
+/**
+ * IPP module limitation.
+ *
+ * @type: limit type (see drm_exynos_ipp_limit_type enum)
+ * @reserved: padding
+ * @h: horizontal limits
+ * @v: vertical limits
+ */
+struct drm_exynos_ipp_limit {
+	__u32 type;
+	__u32 reserved;
+	struct drm_exynos_ipp_limit_val h;
+	struct drm_exynos_ipp_limit_val v;
+};
+
+/**
+ * Get IPP limits for given image format.
+ *
+ * @ipp_id: id of IPP module to query
+ * @fourcc: image format code (see DRM_FORMAT_* in drm_fourcc.h)
+ * @modifier: image format modifier (see DRM_FORMAT_MOD_* in drm_fourcc.h)
+ * @type: source/destination identifier (drm_exynos_ipp_format_flag enum)
+ * @limits_count: size of limits array (in entries) / number of filled entries
+ *		 (set by driver)
+ * @limits_ptr: pointer to limits array or NULL
+ */
+struct drm_exynos_ioctl_ipp_get_limits {
+	__u32 ipp_id;
+	__u32 fourcc;
+	__u64 modifier;
+	__u32 type;
+	__u32 limits_count;
+	__u64 limits_ptr;
+};
+
+enum drm_exynos_ipp_task_id {
+	/* buffer described by struct drm_exynos_ipp_task_buffer */
+	DRM_EXYNOS_IPP_TASK_BUFFER		= 0x0001,
+	/* rectangle described by struct drm_exynos_ipp_task_rect */
+	DRM_EXYNOS_IPP_TASK_RECTANGLE		= 0x0002,
+	/* transformation described by struct drm_exynos_ipp_task_transform */
+	DRM_EXYNOS_IPP_TASK_TRANSFORM		= 0x0003,
+	/* alpha configuration described by struct drm_exynos_ipp_task_alpha */
+	DRM_EXYNOS_IPP_TASK_ALPHA		= 0x0004,
+
+	/* source image data (for buffer and rectangle chunks) */
+	DRM_EXYNOS_IPP_TASK_TYPE_SOURCE		= 0x0001 << 16,
+	/* destination image data (for buffer and rectangle chunks) */
+	DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION	= 0x0002 << 16,
+};
+
+/**
+ * Memory buffer with image data.
+ *
+ * @id: must be DRM_EXYNOS_IPP_TASK_BUFFER
+ * other parameters are same as for AddFB2 generic DRM ioctl
+ */
+struct drm_exynos_ipp_task_buffer {
+	__u32	id;
+	__u32	fourcc;
+	__u32	width, height;
+	__u32	gem_id[4];
+	__u32	offset[4];
+	__u32	pitch[4];
+	__u64	modifier;
+};
+
+/**
+ * Rectangle for processing.
+ *
+ * @id: must be DRM_EXYNOS_IPP_TASK_RECTANGLE
+ * @reserved: padding
+ * @x,@y: left corner in pixels
+ * @w,@h: width/height in pixels
+ */
+struct drm_exynos_ipp_task_rect {
+	__u32	id;
+	__u32	reserved;
+	__u32	x;
+	__u32	y;
+	__u32	w;
+	__u32	h;
+};
+
+/**
+ * Image tranformation description.
+ *
+ * @id: must be DRM_EXYNOS_IPP_TASK_TRANSFORM
+ * @rotation: DRM_MODE_ROTATE_* and DRM_MODE_REFLECT_* values
+ */
+struct drm_exynos_ipp_task_transform {
+	__u32	id;
+	__u32	rotation;
+};
+
+/**
+ * Image global alpha configuration for formats without alpha values.
+ *
+ * @id: must be DRM_EXYNOS_IPP_TASK_ALPHA
+ * @value: global alpha value (0-255)
+ */
+struct drm_exynos_ipp_task_alpha {
+	__u32	id;
+	__u32	value;
+};
+
+enum drm_exynos_ipp_flag {
+	/* generate DRM event after processing */
+	DRM_EXYNOS_IPP_FLAG_EVENT	= 0x01,
+	/* dry run, only check task parameters */
+	DRM_EXYNOS_IPP_FLAG_TEST_ONLY	= 0x02,
+	/* non-blocking processing */
+	DRM_EXYNOS_IPP_FLAG_NONBLOCK	= 0x04,
+};
+
+#define DRM_EXYNOS_IPP_FLAGS (DRM_EXYNOS_IPP_FLAG_EVENT |\
+		DRM_EXYNOS_IPP_FLAG_TEST_ONLY | DRM_EXYNOS_IPP_FLAG_NONBLOCK)
+
+/**
+ * Perform image processing described by array of drm_exynos_ipp_task_*
+ * structures (parameters array).
+ *
+ * @ipp_id: id of IPP module to run the task
+ * @flags: bitmask of drm_exynos_ipp_flag values
+ * @reserved: padding
+ * @params_size: size of parameters array (in bytes)
+ * @params_ptr: pointer to parameters array or NULL
+ * @user_data: (optional) data for drm event
+ */
+struct drm_exynos_ioctl_ipp_commit {
+	__u32 ipp_id;
+	__u32 flags;
+	__u32 reserved;
+	__u32 params_size;
+	__u64 params_ptr;
+	__u64 user_data;
+};
+
 #define DRM_EXYNOS_GEM_CREATE		0x00
 #define DRM_EXYNOS_GEM_MAP		0x01
 /* Reserved 0x03 ~ 0x05 for exynos specific gem ioctl */
@@ -147,6 +360,11 @@ struct drm_exynos_g2d_exec {
 #define DRM_EXYNOS_G2D_EXEC		0x22
 
 /* Reserved 0x30 ~ 0x33 for obsolete Exynos IPP ioctls */
+/* IPP - Image Post Processing */
+#define DRM_EXYNOS_IPP_GET_RESOURCES	0x40
+#define DRM_EXYNOS_IPP_GET_CAPS		0x41
+#define DRM_EXYNOS_IPP_GET_LIMITS	0x42
+#define DRM_EXYNOS_IPP_COMMIT		0x43
 
 #define DRM_IOCTL_EXYNOS_GEM_CREATE		DRM_IOWR(DRM_COMMAND_BASE + \
 		DRM_EXYNOS_GEM_CREATE, struct drm_exynos_gem_create)
@@ -165,8 +383,20 @@ struct drm_exynos_g2d_exec {
 #define DRM_IOCTL_EXYNOS_G2D_EXEC		DRM_IOWR(DRM_COMMAND_BASE + \
 		DRM_EXYNOS_G2D_EXEC, struct drm_exynos_g2d_exec)
 
+#define DRM_IOCTL_EXYNOS_IPP_GET_RESOURCES	DRM_IOWR(DRM_COMMAND_BASE + \
+		DRM_EXYNOS_IPP_GET_RESOURCES, \
+		struct drm_exynos_ioctl_ipp_get_res)
+#define DRM_IOCTL_EXYNOS_IPP_GET_CAPS		DRM_IOWR(DRM_COMMAND_BASE + \
+		DRM_EXYNOS_IPP_GET_CAPS, struct drm_exynos_ioctl_ipp_get_caps)
+#define DRM_IOCTL_EXYNOS_IPP_GET_LIMITS		DRM_IOWR(DRM_COMMAND_BASE + \
+		DRM_EXYNOS_IPP_GET_LIMITS, \
+		struct drm_exynos_ioctl_ipp_get_limits)
+#define DRM_IOCTL_EXYNOS_IPP_COMMIT		DRM_IOWR(DRM_COMMAND_BASE + \
+		DRM_EXYNOS_IPP_COMMIT, struct drm_exynos_ioctl_ipp_commit)
+
 /* EXYNOS specific events */
 #define DRM_EXYNOS_G2D_EVENT		0x80000000
+#define DRM_EXYNOS_IPP_EVENT		0x80000002
 
 struct drm_exynos_g2d_event {
 	struct drm_event	base;
@@ -177,6 +407,16 @@ struct drm_exynos_g2d_event {
 	__u32			reserved;
 };
 
+struct drm_exynos_ipp_event {
+	struct drm_event	base;
+	__u64			user_data;
+	__u32			tv_sec;
+	__u32			tv_usec;
+	__u32			ipp_id;
+	__u32			sequence;
+	__u64			reserved;
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
index 16e452aa12d4..0dea2f0143a3 100644
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -412,6 +412,14 @@ typedef struct drm_i915_irq_wait {
 	int irq_seq;
 } drm_i915_irq_wait_t;
 
+/*
+ * Different modes of per-process Graphics Translation Table,
+ * see I915_PARAM_HAS_ALIASING_PPGTT
+ */
+#define I915_GEM_PPGTT_NONE	0
+#define I915_GEM_PPGTT_ALIASING	1
+#define I915_GEM_PPGTT_FULL	2
+
 /* Ioctl to query kernel params:
  */
 #define I915_PARAM_IRQ_ACTIVE            1
@@ -529,6 +537,28 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
 
+/*
+ * Once upon a time we supposed that writes through the GGTT would be
+ * immediately in physical memory (once flushed out of the CPU path). However,
+ * on a few different processors and chipsets, this is not necessarily the case
+ * as the writes appear to be buffered internally. Thus a read of the backing
+ * storage (physical memory) via a different path (with different physical tags
+ * to the indirect write via the GGTT) will see stale values from before
+ * the GGTT write. Inside the kernel, we can for the most part keep track of
+ * the different read/write domains in use (e.g. set-domain), but the assumption
+ * of coherency is baked into the ABI, hence reporting its true state in this
+ * parameter.
+ *
+ * Reports true when writes via mmap_gtt are immediately visible following an
+ * lfence to flush the WCB.
+ *
+ * Reports false when writes via mmap_gtt are indeterminately delayed in an in
+ * internal buffer and are _not_ immediately visible to third parties accessing
+ * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC
+ * communications channel when reporting false is strongly disadvised.
+ */
+#define I915_PARAM_MMAP_GTT_COHERENT	52
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -1456,9 +1486,52 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+	/*
+	 * When using the following param, value should be a pointer to
+	 * drm_i915_gem_context_param_sseu.
+	 */
+#define I915_CONTEXT_PARAM_SSEU		0x7
 	__u64 value;
 };
 
+struct drm_i915_gem_context_param_sseu {
+	/*
+	 * Engine class & instance to be configured or queried.
+	 */
+	__u16 class;
+	__u16 instance;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 rsvd1;
+
+	/*
+	 * Mask of slices to enable for the context. Valid values are a subset
+	 * of the bitmask value returned for I915_PARAM_SLICE_MASK.
+	 */
+	__u64 slice_mask;
+
+	/*
+	 * Mask of subslices to enable for the context. Valid values are a
+	 * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK.
+	 */
+	__u64 subslice_mask;
+
+	/*
+	 * Minimum/Maximum number of EUs to enable per subslice for the
+	 * context. min_eus_per_subslice must be inferior or equal to
+	 * max_eus_per_subslice.
+	 */
+	__u16 min_eus_per_subslice;
+	__u16 max_eus_per_subslice;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 rsvd2;
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */
@@ -1510,6 +1583,13 @@ enum drm_i915_perf_property_id {
 	 */
 	DRM_I915_PERF_PROP_OA_EXPONENT,
 
+	/**
+	 * Specify a global OA buffer size to be allocated in bytes. The size
+	 * specified must be supported by HW (currently supported sizes are
+	 * powers of 2 ranging from 128Kb to 16Mb).
+	 */
+	DRM_I915_PERF_PROP_OA_BUFFER_SIZE,
+
 	DRM_I915_PERF_PROP_MAX /* non-ABI */
 };
 
diff --git a/include/drm-uapi/msm_drm.h b/include/drm-uapi/msm_drm.h
index bbbaffad772d..c06d0a5bdd80 100644
--- a/include/drm-uapi/msm_drm.h
+++ b/include/drm-uapi/msm_drm.h
@@ -201,10 +201,12 @@ struct drm_msm_gem_submit_bo {
 #define MSM_SUBMIT_NO_IMPLICIT   0x80000000 /* disable implicit sync */
 #define MSM_SUBMIT_FENCE_FD_IN   0x40000000 /* enable input fence_fd */
 #define MSM_SUBMIT_FENCE_FD_OUT  0x20000000 /* enable output fence_fd */
+#define MSM_SUBMIT_SUDO          0x10000000 /* run submitted cmds from RB */
 #define MSM_SUBMIT_FLAGS                ( \
 		MSM_SUBMIT_NO_IMPLICIT   | \
 		MSM_SUBMIT_FENCE_FD_IN   | \
 		MSM_SUBMIT_FENCE_FD_OUT  | \
+		MSM_SUBMIT_SUDO          | \
 		0)
 
 /* Each cmdstream submit consists of a table of buffers involved, and
diff --git a/include/drm-uapi/tegra_drm.h b/include/drm-uapi/tegra_drm.h
index 12f9bf848db1..6c07919c04e9 100644
--- a/include/drm-uapi/tegra_drm.h
+++ b/include/drm-uapi/tegra_drm.h
@@ -32,143 +32,615 @@ extern "C" {
 #define DRM_TEGRA_GEM_CREATE_TILED     (1 << 0)
 #define DRM_TEGRA_GEM_CREATE_BOTTOM_UP (1 << 1)
 
+/**
+ * struct drm_tegra_gem_create - parameters for the GEM object creation IOCTL
+ */
 struct drm_tegra_gem_create {
+	/**
+	 * @size:
+	 *
+	 * The size, in bytes, of the buffer object to be created.
+	 */
 	__u64 size;
+
+	/**
+	 * @flags:
+	 *
+	 * A bitmask of flags that influence the creation of GEM objects:
+	 *
+	 * DRM_TEGRA_GEM_CREATE_TILED
+	 *   Use the 16x16 tiling format for this buffer.
+	 *
+	 * DRM_TEGRA_GEM_CREATE_BOTTOM_UP
+	 *   The buffer has a bottom-up layout.
+	 */
 	__u32 flags;
+
+	/**
+	 * @handle:
+	 *
+	 * The handle of the created GEM object. Set by the kernel upon
+	 * successful completion of the IOCTL.
+	 */
 	__u32 handle;
 };
 
+/**
+ * struct drm_tegra_gem_mmap - parameters for the GEM mmap IOCTL
+ */
 struct drm_tegra_gem_mmap {
+	/**
+	 * @handle:
+	 *
+	 * Handle of the GEM object to obtain an mmap offset for.
+	 */
 	__u32 handle;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
+
+	/**
+	 * @offset:
+	 *
+	 * The mmap offset for the given GEM object. Set by the kernel upon
+	 * successful completion of the IOCTL.
+	 */
 	__u64 offset;
 };
 
+/**
+ * struct drm_tegra_syncpt_read - parameters for the read syncpoint IOCTL
+ */
 struct drm_tegra_syncpt_read {
+	/**
+	 * @id:
+	 *
+	 * ID of the syncpoint to read the current value from.
+	 */
 	__u32 id;
+
+	/**
+	 * @value:
+	 *
+	 * The current syncpoint value. Set by the kernel upon successful
+	 * completion of the IOCTL.
+	 */
 	__u32 value;
 };
 
+/**
+ * struct drm_tegra_syncpt_incr - parameters for the increment syncpoint IOCTL
+ */
 struct drm_tegra_syncpt_incr {
+	/**
+	 * @id:
+	 *
+	 * ID of the syncpoint to increment.
+	 */
 	__u32 id;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };
 
+/**
+ * struct drm_tegra_syncpt_wait - parameters for the wait syncpoint IOCTL
+ */
 struct drm_tegra_syncpt_wait {
+	/**
+	 * @id:
+	 *
+	 * ID of the syncpoint to wait on.
+	 */
 	__u32 id;
+
+	/**
+	 * @thresh:
+	 *
+	 * Threshold value for which to wait.
+	 */
 	__u32 thresh;
+
+	/**
+	 * @timeout:
+	 *
+	 * Timeout, in milliseconds, to wait.
+	 */
 	__u32 timeout;
+
+	/**
+	 * @value:
+	 *
+	 * The new syncpoint value after the wait. Set by the kernel upon
+	 * successful completion of the IOCTL.
+	 */
 	__u32 value;
 };
 
 #define DRM_TEGRA_NO_TIMEOUT	(0xffffffff)
 
+/**
+ * struct drm_tegra_open_channel - parameters for the open channel IOCTL
+ */
 struct drm_tegra_open_channel {
+	/**
+	 * @client:
+	 *
+	 * The client ID for this channel.
+	 */
 	__u32 client;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
+
+	/**
+	 * @context:
+	 *
+	 * The application context of this channel. Set by the kernel upon
+	 * successful completion of the IOCTL. This context needs to be passed
+	 * to the DRM_TEGRA_CHANNEL_CLOSE or the DRM_TEGRA_SUBMIT IOCTLs.
+	 */
 	__u64 context;
 };
 
+/**
+ * struct drm_tegra_close_channel - parameters for the close channel IOCTL
+ */
 struct drm_tegra_close_channel {
+	/**
+	 * @context:
+	 *
+	 * The application context of this channel. This is obtained from the
+	 * DRM_TEGRA_OPEN_CHANNEL IOCTL.
+	 */
 	__u64 context;
 };
 
+/**
+ * struct drm_tegra_get_syncpt - parameters for the get syncpoint IOCTL
+ */
 struct drm_tegra_get_syncpt {
+	/**
+	 * @context:
+	 *
+	 * The application context identifying the channel for which to obtain
+	 * the syncpoint ID.
+	 */
 	__u64 context;
+
+	/**
+	 * @index:
+	 *
+	 * Index of the client syncpoint for which to obtain the ID.
+	 */
 	__u32 index;
+
+	/**
+	 * @id:
+	 *
+	 * The ID of the given syncpoint. Set by the kernel upon successful
+	 * completion of the IOCTL.
+	 */
 	__u32 id;
 };
 
+/**
+ * struct drm_tegra_get_syncpt_base - parameters for the get wait base IOCTL
+ */
 struct drm_tegra_get_syncpt_base {
+	/**
+	 * @context:
+	 *
+	 * The application context identifying for which channel to obtain the
+	 * wait base.
+	 */
 	__u64 context;
+
+	/**
+	 * @syncpt:
+	 *
+	 * ID of the syncpoint for which to obtain the wait base.
+	 */
 	__u32 syncpt;
+
+	/**
+	 * @id:
+	 *
+	 * The ID of the wait base corresponding to the client syncpoint. Set
+	 * by the kernel upon successful completion of the IOCTL.
+	 */
 	__u32 id;
 };
 
+/**
+ * struct drm_tegra_syncpt - syncpoint increment operation
+ */
 struct drm_tegra_syncpt {
+	/**
+	 * @id:
+	 *
+	 * ID of the syncpoint to operate on.
+	 */
 	__u32 id;
+
+	/**
+	 * @incrs:
+	 *
+	 * Number of increments to perform for the syncpoint.
+	 */
 	__u32 incrs;
 };
 
+/**
+ * struct drm_tegra_cmdbuf - structure describing a command buffer
+ */
 struct drm_tegra_cmdbuf {
+	/**
+	 * @handle:
+	 *
+	 * Handle to a GEM object containing the command buffer.
+	 */
 	__u32 handle;
+
+	/**
+	 * @offset:
+	 *
+	 * Offset, in bytes, into the GEM object identified by @handle at
+	 * which the command buffer starts.
+	 */
 	__u32 offset;
+
+	/**
+	 * @words:
+	 *
+	 * Number of 32-bit words in this command buffer.
+	 */
 	__u32 words;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };
 
+/**
+ * struct drm_tegra_reloc - GEM object relocation structure
+ */
 struct drm_tegra_reloc {
 	struct {
+		/**
+		 * @cmdbuf.handle:
+		 *
+		 * Handle to the GEM object containing the command buffer for
+		 * which to perform this GEM object relocation.
+		 */
 		__u32 handle;
+
+		/**
+		 * @cmdbuf.offset:
+		 *
+		 * Offset, in bytes, into the command buffer at which to
+		 * insert the relocated address.
+		 */
 		__u32 offset;
 	} cmdbuf;
 	struct {
+		/**
+		 * @target.handle:
+		 *
+		 * Handle to the GEM object to be relocated.
+		 */
 		__u32 handle;
+
+		/**
+		 * @target.offset:
+		 *
+		 * Offset, in bytes, into the target GEM object at which the
+		 * relocated data starts.
+		 */
 		__u32 offset;
 	} target;
+
+	/**
+	 * @shift:
+	 *
+	 * The number of bits by which to shift relocated addresses.
+	 */
 	__u32 shift;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };
 
+/**
+ * struct drm_tegra_waitchk - wait check structure
+ */
 struct drm_tegra_waitchk {
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object containing a command stream on which to
+	 * perform the wait check.
+	 */
 	__u32 handle;
+
+	/**
+	 * @offset:
+	 *
+	 * Offset, in bytes, of the location in the command stream to perform
+	 * the wait check on.
+	 */
 	__u32 offset;
+
+	/**
+	 * @syncpt:
+	 *
+	 * ID of the syncpoint to wait check.
+	 */
 	__u32 syncpt;
+
+	/**
+	 * @thresh:
+	 *
+	 * Threshold value for which to check.
+	 */
 	__u32 thresh;
 };
 
+/**
+ * struct drm_tegra_submit - job submission structure
+ */
 struct drm_tegra_submit {
+	/**
+	 * @context:
+	 *
+	 * The application context identifying the channel to use for the
+	 * execution of this job.
+	 */
 	__u64 context;
+
+	/**
+	 * @num_syncpts:
+	 *
+	 * The number of syncpoints operated on by this job. This defines the
+	 * length of the array pointed to by @syncpts.
+	 */
 	__u32 num_syncpts;
+
+	/**
+	 * @num_cmdbufs:
+	 *
+	 * The number of command buffers to execute as part of this job. This
+	 * defines the length of the array pointed to by @cmdbufs.
+	 */
 	__u32 num_cmdbufs;
+
+	/**
+	 * @num_relocs:
+	 *
+	 * The number of relocations to perform before executing this job.
+	 * This defines the length of the array pointed to by @relocs.
+	 */
 	__u32 num_relocs;
+
+	/**
+	 * @num_waitchks:
+	 *
+	 * The number of wait checks to perform as part of this job. This
+	 * defines the length of the array pointed to by @waitchks.
+	 */
 	__u32 num_waitchks;
+
+	/**
+	 * @waitchk_mask:
+	 *
+	 * Bitmask of valid wait checks.
+	 */
 	__u32 waitchk_mask;
+
+	/**
+	 * @timeout:
+	 *
+	 * Timeout, in milliseconds, before this job is cancelled.
+	 */
 	__u32 timeout;
+
+	/**
+	 * @syncpts:
+	 *
+	 * A pointer to an array of &struct drm_tegra_syncpt structures that
+	 * specify the syncpoint operations performed as part of this job.
+	 * The number of elements in the array must be equal to the value
+	 * given by @num_syncpts.
+	 */
 	__u64 syncpts;
+
+	/**
+	 * @cmdbufs:
+	 *
+	 * A pointer to an array of &struct drm_tegra_cmdbuf structures that
+	 * define the command buffers to execute as part of this job. The
+	 * number of elements in the array must be equal to the value given
+	 * by @num_syncpts.
+	 */
 	__u64 cmdbufs;
+
+	/**
+	 * @relocs:
+	 *
+	 * A pointer to an array of &struct drm_tegra_reloc structures that
+	 * specify the relocations that need to be performed before executing
+	 * this job. The number of elements in the array must be equal to the
+	 * value given by @num_relocs.
+	 */
 	__u64 relocs;
+
+	/**
+	 * @waitchks:
+	 *
+	 * A pointer to an array of &struct drm_tegra_waitchk structures that
+	 * specify the wait checks to be performed while executing this job.
+	 * The number of elements in the array must be equal to the value
+	 * given by @num_waitchks.
+	 */
 	__u64 waitchks;
-	__u32 fence;		/* Return value */
 
-	__u32 reserved[5];	/* future expansion */
+	/**
+	 * @fence:
+	 *
+	 * The threshold of the syncpoint associated with this job after it
+	 * has been completed. Set by the kernel upon successful completion of
+	 * the IOCTL. This can be used with the DRM_TEGRA_SYNCPT_WAIT IOCTL to
+	 * wait for this job to be finished.
+	 */
+	__u32 fence;
+
+	/**
+	 * @reserved:
+	 *
+	 * This field is reserved for future use. Must be 0.
+	 */
+	__u32 reserved[5];
 };
 
 #define DRM_TEGRA_GEM_TILING_MODE_PITCH 0
 #define DRM_TEGRA_GEM_TILING_MODE_TILED 1
 #define DRM_TEGRA_GEM_TILING_MODE_BLOCK 2
 
+/**
+ * struct drm_tegra_gem_set_tiling - parameters for the set tiling IOCTL
+ */
 struct drm_tegra_gem_set_tiling {
-	/* input */
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object for which to set the tiling parameters.
+	 */
 	__u32 handle;
+
+	/**
+	 * @mode:
+	 *
+	 * The tiling mode to set. Must be one of:
+	 *
+	 * DRM_TEGRA_GEM_TILING_MODE_PITCH
+	 *   pitch linear format
+	 *
+	 * DRM_TEGRA_GEM_TILING_MODE_TILED
+	 *   16x16 tiling format
+	 *
+	 * DRM_TEGRA_GEM_TILING_MODE_BLOCK
+	 *   16Bx2 tiling format
+	 */
 	__u32 mode;
+
+	/**
+	 * @value:
+	 *
+	 * The value to set for the tiling mode parameter.
+	 */
 	__u32 value;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };
 
+/**
+ * struct drm_tegra_gem_get_tiling - parameters for the get tiling IOCTL
+ */
 struct drm_tegra_gem_get_tiling {
-	/* input */
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object for which to query the tiling parameters.
+	 */
 	__u32 handle;
-	/* output */
+
+	/**
+	 * @mode:
+	 *
+	 * The tiling mode currently associated with the GEM object. Set by
+	 * the kernel upon successful completion of the IOCTL.
+	 */
 	__u32 mode;
+
+	/**
+	 * @value:
+	 *
+	 * The tiling mode parameter currently associated with the GEM object.
+	 * Set by the kernel upon successful completion of the IOCTL.
+	 */
 	__u32 value;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };
 
 #define DRM_TEGRA_GEM_BOTTOM_UP		(1 << 0)
 #define DRM_TEGRA_GEM_FLAGS		(DRM_TEGRA_GEM_BOTTOM_UP)
 
+/**
+ * struct drm_tegra_gem_set_flags - parameters for the set flags IOCTL
+ */
 struct drm_tegra_gem_set_flags {
-	/* input */
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object for which to set the flags.
+	 */
 	__u32 handle;
-	/* output */
+
+	/**
+	 * @flags:
+	 *
+	 * The flags to set for the GEM object.
+	 */
 	__u32 flags;
 };
 
+/**
+ * struct drm_tegra_gem_get_flags - parameters for the get flags IOCTL
+ */
 struct drm_tegra_gem_get_flags {
-	/* input */
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object for which to query the flags.
+	 */
 	__u32 handle;
-	/* output */
+
+	/**
+	 * @flags:
+	 *
+	 * The flags currently associated with the GEM object. Set by the
+	 * kernel upon successful completion of the IOCTL.
+	 */
 	__u32 flags;
 };
 
@@ -193,7 +665,7 @@ struct drm_tegra_gem_get_flags {
 #define DRM_IOCTL_TEGRA_SYNCPT_INCR DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SYNCPT_INCR, struct drm_tegra_syncpt_incr)
 #define DRM_IOCTL_TEGRA_SYNCPT_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SYNCPT_WAIT, struct drm_tegra_syncpt_wait)
 #define DRM_IOCTL_TEGRA_OPEN_CHANNEL DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_OPEN_CHANNEL, struct drm_tegra_open_channel)
-#define DRM_IOCTL_TEGRA_CLOSE_CHANNEL DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_CLOSE_CHANNEL, struct drm_tegra_open_channel)
+#define DRM_IOCTL_TEGRA_CLOSE_CHANNEL DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_CLOSE_CHANNEL, struct drm_tegra_close_channel)
 #define DRM_IOCTL_TEGRA_GET_SYNCPT DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GET_SYNCPT, struct drm_tegra_get_syncpt)
 #define DRM_IOCTL_TEGRA_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SUBMIT, struct drm_tegra_submit)
 #define DRM_IOCTL_TEGRA_GET_SYNCPT_BASE DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GET_SYNCPT_BASE, struct drm_tegra_get_syncpt_base)
diff --git a/include/drm-uapi/v3d_drm.h b/include/drm-uapi/v3d_drm.h
new file mode 100644
index 000000000000..f446656d00b1
--- /dev/null
+++ b/include/drm-uapi/v3d_drm.h
@@ -0,0 +1,204 @@
+/*
+ * Copyright © 2014-2018 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _V3D_DRM_H_
+#define _V3D_DRM_H_
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DRM_V3D_SUBMIT_CL                         0x00
+#define DRM_V3D_WAIT_BO                           0x01
+#define DRM_V3D_CREATE_BO                         0x02
+#define DRM_V3D_MMAP_BO                           0x03
+#define DRM_V3D_GET_PARAM                         0x04
+#define DRM_V3D_GET_BO_OFFSET                     0x05
+
+#define DRM_IOCTL_V3D_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
+#define DRM_IOCTL_V3D_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
+#define DRM_IOCTL_V3D_CREATE_BO           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_CREATE_BO, struct drm_v3d_create_bo)
+#define DRM_IOCTL_V3D_MMAP_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo)
+#define DRM_IOCTL_V3D_GET_PARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param)
+#define DRM_IOCTL_V3D_GET_BO_OFFSET       DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset)
+
+/**
+ * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D
+ * engine.
+ *
+ * This asks the kernel to have the GPU execute an optional binner
+ * command list, and a render command list.
+ */
+struct drm_v3d_submit_cl {
+	/* Pointer to the binner command list.
+	 *
+	 * This is the first set of commands executed, which runs the
+	 * coordinate shader to determine where primitives land on the screen,
+	 * then writes out the state updates and draw calls necessary per tile
+	 * to the tile allocation BO.
+	 *
+	 * This BCL will block on any previous BCL submitted on the
+	 * same FD, but not on any RCL or BCLs submitted by other
+	 * clients -- that is left up to the submitter to control
+	 * using in_sync_bcl if necessary.
+	 */
+	__u32 bcl_start;
+
+	 /** End address of the BCL (first byte after the BCL) */
+	__u32 bcl_end;
+
+	/* Offset of the render command list.
+	 *
+	 * This is the second set of commands executed, which will either
+	 * execute the tiles that have been set up by the BCL, or a fixed set
+	 * of tiles (in the case of RCL-only blits).
+	 *
+	 * This RCL will block on this submit's BCL, and any previous
+	 * RCL submitted on the same FD, but not on any RCL or BCLs
+	 * submitted by other clients -- that is left up to the
+	 * submitter to control using in_sync_rcl if necessary.
+	 */
+	__u32 rcl_start;
+
+	 /** End address of the RCL (first byte after the RCL) */
+	__u32 rcl_end;
+
+	/** An optional sync object to wait on before starting the BCL. */
+	__u32 in_sync_bcl;
+	/** An optional sync object to wait on before starting the RCL. */
+	__u32 in_sync_rcl;
+	/** An optional sync object to place the completion fence in. */
+	__u32 out_sync;
+
+	/* Offset of the tile alloc memory
+	 *
+	 * This is optional on V3D 3.3 (where the CL can set the value) but
+	 * required on V3D 4.1.
+	 */
+	__u32 qma;
+
+	/** Size of the tile alloc memory. */
+	__u32 qms;
+
+	/** Offset of the tile state data array. */
+	__u32 qts;
+
+	/* Pointer to a u32 array of the BOs that are referenced by the job.
+	 */
+	__u64 bo_handles;
+
+	/* Number of BO handles passed in (size is that times 4). */
+	__u32 bo_handle_count;
+
+	/* Pad, must be zero-filled. */
+	__u32 pad;
+};
+
+/**
+ * struct drm_v3d_wait_bo - ioctl argument for waiting for
+ * completion of the last DRM_V3D_SUBMIT_CL on a BO.
+ *
+ * This is useful for cases where multiple processes might be
+ * rendering to a BO and you want to wait for all rendering to be
+ * completed.
+ */
+struct drm_v3d_wait_bo {
+	__u32 handle;
+	__u32 pad;
+	__u64 timeout_ns;
+};
+
+/**
+ * struct drm_v3d_create_bo - ioctl argument for creating V3D BOs.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_v3d_create_bo {
+	__u32 size;
+	__u32 flags;
+	/** Returned GEM handle for the BO. */
+	__u32 handle;
+	/**
+	 * Returned offset for the BO in the V3D address space.  This offset
+	 * is private to the DRM fd and is valid for the lifetime of the GEM
+	 * handle.
+	 *
+	 * This offset value will always be nonzero, since various HW
+	 * units treat 0 specially.
+	 */
+	__u32 offset;
+};
+
+/**
+ * struct drm_v3d_mmap_bo - ioctl argument for mapping V3D BOs.
+ *
+ * This doesn't actually perform an mmap.  Instead, it returns the
+ * offset you need to use in an mmap on the DRM device node.  This
+ * means that tools like valgrind end up knowing about the mapped
+ * memory.
+ *
+ * There are currently no values for the flags argument, but it may be
+ * used in a future extension.
+ */
+struct drm_v3d_mmap_bo {
+	/** Handle for the object being mapped. */
+	__u32 handle;
+	__u32 flags;
+	/** offset into the drm node to use for subsequent mmap call. */
+	__u64 offset;
+};
+
+enum drm_v3d_param {
+	DRM_V3D_PARAM_V3D_UIFCFG,
+	DRM_V3D_PARAM_V3D_HUB_IDENT1,
+	DRM_V3D_PARAM_V3D_HUB_IDENT2,
+	DRM_V3D_PARAM_V3D_HUB_IDENT3,
+	DRM_V3D_PARAM_V3D_CORE0_IDENT0,
+	DRM_V3D_PARAM_V3D_CORE0_IDENT1,
+	DRM_V3D_PARAM_V3D_CORE0_IDENT2,
+};
+
+struct drm_v3d_get_param {
+	__u32 param;
+	__u32 pad;
+	__u64 value;
+};
+
+/**
+ * Returns the offset for the BO in the V3D address space for this DRM fd.
+ * This is the same value returned by drm_v3d_create_bo, if that was called
+ * from this DRM fd.
+ */
+struct drm_v3d_get_bo_offset {
+	__u32 handle;
+	__u32 offset;
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _V3D_DRM_H_ */
diff --git a/include/drm-uapi/vc4_drm.h b/include/drm-uapi/vc4_drm.h
index 4117117b4204..31f50de39acb 100644
--- a/include/drm-uapi/vc4_drm.h
+++ b/include/drm-uapi/vc4_drm.h
@@ -183,10 +183,17 @@ struct drm_vc4_submit_cl {
 	/* ID of the perfmon to attach to this job. 0 means no perfmon. */
 	__u32 perfmonid;
 
-	/* Unused field to align this struct on 64 bits. Must be set to 0.
-	 * If one ever needs to add an u32 field to this struct, this field
-	 * can be used.
+	/* Syncobj handle to wait on. If set, processing of this render job
+	 * will not start until the syncobj is signaled. 0 means ignore.
 	 */
+	__u32 in_sync;
+
+	/* Syncobj handle to export fence to. If set, the fence in the syncobj
+	 * will be replaced with a fence that signals upon completion of this
+	 * render job. 0 means ignore.
+	 */
+	__u32 out_sync;
+
 	__u32 pad2;
 };
 
diff --git a/include/drm-uapi/virtgpu_drm.h b/include/drm-uapi/virtgpu_drm.h
index 91a31ffed828..9a781f0611df 100644
--- a/include/drm-uapi/virtgpu_drm.h
+++ b/include/drm-uapi/virtgpu_drm.h
@@ -63,6 +63,7 @@ struct drm_virtgpu_execbuffer {
 };
 
 #define VIRTGPU_PARAM_3D_FEATURES 1 /* do we have 3D features in the hw */
+#define VIRTGPU_PARAM_CAPSET_QUERY_FIX 2 /* do we have the capset fix */
 
 struct drm_virtgpu_getparam {
 	__u64 param;
diff --git a/include/drm-uapi/vmwgfx_drm.h b/include/drm-uapi/vmwgfx_drm.h
index 0bc784f5e0db..399f58317cff 100644
--- a/include/drm-uapi/vmwgfx_drm.h
+++ b/include/drm-uapi/vmwgfx_drm.h
@@ -40,6 +40,7 @@ extern "C" {
 
 #define DRM_VMW_GET_PARAM            0
 #define DRM_VMW_ALLOC_DMABUF         1
+#define DRM_VMW_ALLOC_BO             1
 #define DRM_VMW_UNREF_DMABUF         2
 #define DRM_VMW_HANDLE_CLOSE         2
 #define DRM_VMW_CURSOR_BYPASS        3
@@ -68,6 +69,8 @@ extern "C" {
 #define DRM_VMW_GB_SURFACE_REF       24
 #define DRM_VMW_SYNCCPU              25
 #define DRM_VMW_CREATE_EXTENDED_CONTEXT 26
+#define DRM_VMW_GB_SURFACE_CREATE_EXT   27
+#define DRM_VMW_GB_SURFACE_REF_EXT      28
 
 /*************************************************************************/
 /**
@@ -79,6 +82,9 @@ extern "C" {
  *
  * DRM_VMW_PARAM_OVERLAY_IOCTL:
  * Does the driver support the overlay ioctl.
+ *
+ * DRM_VMW_PARAM_SM4_1
+ * SM4_1 support is enabled.
  */
 
 #define DRM_VMW_PARAM_NUM_STREAMS      0
@@ -94,6 +100,8 @@ extern "C" {
 #define DRM_VMW_PARAM_MAX_MOB_SIZE     10
 #define DRM_VMW_PARAM_SCREEN_TARGET    11
 #define DRM_VMW_PARAM_DX               12
+#define DRM_VMW_PARAM_HW_CAPS2         13
+#define DRM_VMW_PARAM_SM4_1            14
 
 /**
  * enum drm_vmw_handle_type - handle type for ref ioctls
@@ -356,9 +364,9 @@ struct drm_vmw_fence_rep {
 
 /*************************************************************************/
 /**
- * DRM_VMW_ALLOC_DMABUF
+ * DRM_VMW_ALLOC_BO
  *
- * Allocate a DMA buffer that is visible also to the host.
+ * Allocate a buffer object that is visible also to the host.
  * NOTE: The buffer is
  * identified by a handle and an offset, which are private to the guest, but
  * useable in the command stream. The guest kernel may translate these
@@ -366,27 +374,28 @@ struct drm_vmw_fence_rep {
  * be zero at all times, or it may disappear from the interface before it is
  * fixed.
  *
- * The DMA buffer may stay user-space mapped in the guest at all times,
+ * The buffer object may stay user-space mapped in the guest at all times,
  * and is thus suitable for sub-allocation.
  *
- * DMA buffers are mapped using the mmap() syscall on the drm device.
+ * Buffer objects are mapped using the mmap() syscall on the drm device.
  */
 
 /**
- * struct drm_vmw_alloc_dmabuf_req
+ * struct drm_vmw_alloc_bo_req
  *
  * @size: Required minimum size of the buffer.
  *
- * Input data to the DRM_VMW_ALLOC_DMABUF Ioctl.
+ * Input data to the DRM_VMW_ALLOC_BO Ioctl.
  */
 
-struct drm_vmw_alloc_dmabuf_req {
+struct drm_vmw_alloc_bo_req {
 	__u32 size;
 	__u32 pad64;
 };
+#define drm_vmw_alloc_dmabuf_req drm_vmw_alloc_bo_req
 
 /**
- * struct drm_vmw_dmabuf_rep
+ * struct drm_vmw_bo_rep
  *
  * @map_handle: Offset to use in the mmap() call used to map the buffer.
  * @handle: Handle unique to this buffer. Used for unreferencing.
@@ -395,50 +404,32 @@ struct drm_vmw_alloc_dmabuf_req {
  * @cur_gmr_offset: Offset to use in the command stream when this buffer is
  * referenced. See note above.
  *
- * Output data from the DRM_VMW_ALLOC_DMABUF Ioctl.
+ * Output data from the DRM_VMW_ALLOC_BO Ioctl.
  */
 
-struct drm_vmw_dmabuf_rep {
+struct drm_vmw_bo_rep {
 	__u64 map_handle;
 	__u32 handle;
 	__u32 cur_gmr_id;
 	__u32 cur_gmr_offset;
 	__u32 pad64;
 };
+#define drm_vmw_dmabuf_rep drm_vmw_bo_rep
 
 /**
- * union drm_vmw_dmabuf_arg
+ * union drm_vmw_alloc_bo_arg
  *
  * @req: Input data as described above.
  * @rep: Output data as described above.
  *
- * Argument to the DRM_VMW_ALLOC_DMABUF Ioctl.
+ * Argument to the DRM_VMW_ALLOC_BO Ioctl.
  */
 
-union drm_vmw_alloc_dmabuf_arg {
-	struct drm_vmw_alloc_dmabuf_req req;
-	struct drm_vmw_dmabuf_rep rep;
-};
-
-/*************************************************************************/
-/**
- * DRM_VMW_UNREF_DMABUF - Free a DMA buffer.
- *
- */
-
-/**
- * struct drm_vmw_unref_dmabuf_arg
- *
- * @handle: Handle indicating what buffer to free. Obtained from the
- * DRM_VMW_ALLOC_DMABUF Ioctl.
- *
- * Argument to the DRM_VMW_UNREF_DMABUF Ioctl.
- */
-
-struct drm_vmw_unref_dmabuf_arg {
-	__u32 handle;
-	__u32 pad64;
+union drm_vmw_alloc_bo_arg {
+	struct drm_vmw_alloc_bo_req req;
+	struct drm_vmw_bo_rep rep;
 };
+#define drm_vmw_alloc_dmabuf_arg drm_vmw_alloc_bo_arg
 
 /*************************************************************************/
 /**
@@ -1103,9 +1094,8 @@ union drm_vmw_extended_context_arg {
  * DRM_VMW_HANDLE_CLOSE - Close a user-space handle and release its
  * underlying resource.
  *
- * Note that this ioctl is overlaid on the DRM_VMW_UNREF_DMABUF Ioctl.
- * The ioctl arguments therefore need to be identical in layout.
- *
+ * Note that this ioctl is overlaid on the deprecated DRM_VMW_UNREF_DMABUF
+ * Ioctl.
  */
 
 /**
@@ -1119,7 +1109,107 @@ struct drm_vmw_handle_close_arg {
 	__u32 handle;
 	__u32 pad64;
 };
+#define drm_vmw_unref_dmabuf_arg drm_vmw_handle_close_arg
+
+/*************************************************************************/
+/**
+ * DRM_VMW_GB_SURFACE_CREATE_EXT - Create a host guest-backed surface.
+ *
+ * Allocates a surface handle and queues a create surface command
+ * for the host on the first use of the surface. The surface ID can
+ * be used as the surface ID in commands referencing the surface.
+ *
+ * This new command extends DRM_VMW_GB_SURFACE_CREATE by adding version
+ * parameter and 64 bit svga flag.
+ */
+
+/**
+ * enum drm_vmw_surface_version
+ *
+ * @drm_vmw_surface_gb_v1: Corresponds to current gb surface format with
+ * svga3d surface flags split into 2, upper half and lower half.
+ */
+enum drm_vmw_surface_version {
+	drm_vmw_gb_surface_v1
+};
+
+/**
+ * struct drm_vmw_gb_surface_create_ext_req
+ *
+ * @base: Surface create parameters.
+ * @version: Version of surface create ioctl.
+ * @svga3d_flags_upper_32_bits: Upper 32 bits of svga3d flags.
+ * @multisample_pattern: Multisampling pattern when msaa is supported.
+ * @quality_level: Precision settings for each sample.
+ * @must_be_zero: Reserved for future usage.
+ *
+ * Input argument to the  DRM_VMW_GB_SURFACE_CREATE_EXT Ioctl.
+ * Part of output argument for the DRM_VMW_GB_SURFACE_REF_EXT Ioctl.
+ */
+struct drm_vmw_gb_surface_create_ext_req {
+	struct drm_vmw_gb_surface_create_req base;
+	enum drm_vmw_surface_version version;
+	uint32_t svga3d_flags_upper_32_bits;
+	SVGA3dMSPattern multisample_pattern;
+	SVGA3dMSQualityLevel quality_level;
+	uint64_t must_be_zero;
+};
+
+/**
+ * union drm_vmw_gb_surface_create_ext_arg
+ *
+ * @req: Input argument as described above.
+ * @rep: Output argument as described above.
+ *
+ * Argument to the DRM_VMW_GB_SURFACE_CREATE_EXT ioctl.
+ */
+union drm_vmw_gb_surface_create_ext_arg {
+	struct drm_vmw_gb_surface_create_rep rep;
+	struct drm_vmw_gb_surface_create_ext_req req;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_GB_SURFACE_REF_EXT - Reference a host surface.
+ *
+ * Puts a reference on a host surface with a given handle, as previously
+ * returned by the DRM_VMW_GB_SURFACE_CREATE_EXT ioctl.
+ * A reference will make sure the surface isn't destroyed while we hold
+ * it and will allow the calling client to use the surface handle in
+ * the command stream.
+ *
+ * On successful return, the Ioctl returns the surface information given
+ * to and returned from the DRM_VMW_GB_SURFACE_CREATE_EXT ioctl.
+ */
 
+/**
+ * struct drm_vmw_gb_surface_ref_ext_rep
+ *
+ * @creq: The data used as input when the surface was created, as described
+ *        above at "struct drm_vmw_gb_surface_create_ext_req"
+ * @crep: Additional data output when the surface was created, as described
+ *        above at "struct drm_vmw_gb_surface_create_rep"
+ *
+ * Output Argument to the DRM_VMW_GB_SURFACE_REF_EXT ioctl.
+ */
+struct drm_vmw_gb_surface_ref_ext_rep {
+	struct drm_vmw_gb_surface_create_ext_req creq;
+	struct drm_vmw_gb_surface_create_rep crep;
+};
+
+/**
+ * union drm_vmw_gb_surface_reference_ext_arg
+ *
+ * @req: Input data as described above at "struct drm_vmw_surface_arg"
+ * @rep: Output data as described above at
+ *       "struct drm_vmw_gb_surface_ref_ext_rep"
+ *
+ * Argument to the DRM_VMW_GB_SURFACE_REF Ioctl.
+ */
+union drm_vmw_gb_surface_reference_ext_arg {
+	struct drm_vmw_gb_surface_ref_ext_rep rep;
+	struct drm_vmw_surface_arg req;
+};
 
 #if defined(__cplusplus)
 }
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH i-g-t 1/4] headers: bump
  2018-12-13 12:06 [PATCH i-g-t 0/4] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
@ 2018-12-13 12:06 ` Tvrtko Ursulin
  0 siblings, 0 replies; 10+ messages in thread
From: Tvrtko Ursulin @ 2018-12-13 12:06 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

---
 include/drm-uapi/drm_mode.h | 19 ++++++++++++++++
 include/drm-uapi/i915_drm.h | 43 +++++++++++++++++++++++++++++++++++++
 include/drm-uapi/msm_drm.h  | 25 +++++++++++++++------
 include/drm-uapi/v3d_drm.h  | 33 ++++++++++++++++++++++++++++
 4 files changed, 114 insertions(+), 6 deletions(-)

diff --git a/include/drm-uapi/drm_mode.h b/include/drm-uapi/drm_mode.h
index d3e0fe31efc5..a439c2e67896 100644
--- a/include/drm-uapi/drm_mode.h
+++ b/include/drm-uapi/drm_mode.h
@@ -888,6 +888,25 @@ struct drm_mode_revoke_lease {
 	__u32 lessee_id;
 };
 
+/**
+ * struct drm_mode_rect - Two dimensional rectangle.
+ * @x1: Horizontal starting coordinate (inclusive).
+ * @y1: Vertical starting coordinate (inclusive).
+ * @x2: Horizontal ending coordinate (exclusive).
+ * @y2: Vertical ending coordinate (exclusive).
+ *
+ * With drm subsystem using struct drm_rect to manage rectangular area this
+ * export it to user-space.
+ *
+ * Currently used by drm_mode_atomic blob property FB_DAMAGE_CLIPS.
+ */
+struct drm_mode_rect {
+	__s32 x1;
+	__s32 y1;
+	__s32 x2;
+	__s32 y2;
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
index e39b26d4bb3d..6b43c34bc436 100644
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -1486,9 +1486,52 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+	/*
+	 * When using the following param, value should be a pointer to
+	 * drm_i915_gem_context_param_sseu.
+	 */
+#define I915_CONTEXT_PARAM_SSEU		0x7
 	__u64 value;
 };
 
+struct drm_i915_gem_context_param_sseu {
+	/*
+	 * Engine class & instance to be configured or queried.
+	 */
+	__u16 class;
+	__u16 instance;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 rsvd1;
+
+	/*
+	 * Mask of slices to enable for the context. Valid values are a subset
+	 * of the bitmask value returned for I915_PARAM_SLICE_MASK.
+	 */
+	__u64 slice_mask;
+
+	/*
+	 * Mask of subslices to enable for the context. Valid values are a
+	 * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK.
+	 */
+	__u64 subslice_mask;
+
+	/*
+	 * Minimum/Maximum number of EUs to enable per subslice for the
+	 * context. min_eus_per_subslice must be inferior or equal to
+	 * max_eus_per_subslice.
+	 */
+	__u16 min_eus_per_subslice;
+	__u16 max_eus_per_subslice;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 rsvd2;
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */
diff --git a/include/drm-uapi/msm_drm.h b/include/drm-uapi/msm_drm.h
index c06d0a5bdd80..91a16b333c69 100644
--- a/include/drm-uapi/msm_drm.h
+++ b/include/drm-uapi/msm_drm.h
@@ -105,14 +105,24 @@ struct drm_msm_gem_new {
 	__u32 handle;         /* out */
 };
 
-#define MSM_INFO_IOVA	0x01
-
-#define MSM_INFO_FLAGS (MSM_INFO_IOVA)
+/* Get or set GEM buffer info.  The requested value can be passed
+ * directly in 'value', or for data larger than 64b 'value' is a
+ * pointer to userspace buffer, with 'len' specifying the number of
+ * bytes copied into that buffer.  For info returned by pointer,
+ * calling the GEM_INFO ioctl with null 'value' will return the
+ * required buffer size in 'len'
+ */
+#define MSM_INFO_GET_OFFSET	0x00   /* get mmap() offset, returned by value */
+#define MSM_INFO_GET_IOVA	0x01   /* get iova, returned by value */
+#define MSM_INFO_SET_NAME	0x02   /* set the debug name (by pointer) */
+#define MSM_INFO_GET_NAME	0x03   /* get debug name, returned by pointer */
 
 struct drm_msm_gem_info {
 	__u32 handle;         /* in */
-	__u32 flags;	      /* in - combination of MSM_INFO_* flags */
-	__u64 offset;         /* out, mmap() offset or iova */
+	__u32 info;           /* in - one of MSM_INFO_* */
+	__u64 value;          /* in or out */
+	__u32 len;            /* in or out */
+	__u32 pad;
 };
 
 #define MSM_PREP_READ        0x01
@@ -188,8 +198,11 @@ struct drm_msm_gem_submit_cmd {
  */
 #define MSM_SUBMIT_BO_READ             0x0001
 #define MSM_SUBMIT_BO_WRITE            0x0002
+#define MSM_SUBMIT_BO_DUMP             0x0004
 
-#define MSM_SUBMIT_BO_FLAGS            (MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE)
+#define MSM_SUBMIT_BO_FLAGS            (MSM_SUBMIT_BO_READ | \
+					MSM_SUBMIT_BO_WRITE | \
+					MSM_SUBMIT_BO_DUMP)
 
 struct drm_msm_gem_submit_bo {
 	__u32 flags;          /* in, mask of MSM_SUBMIT_BO_x */
diff --git a/include/drm-uapi/v3d_drm.h b/include/drm-uapi/v3d_drm.h
index b1e5de076b0f..ea70669d2138 100644
--- a/include/drm-uapi/v3d_drm.h
+++ b/include/drm-uapi/v3d_drm.h
@@ -36,6 +36,7 @@ extern "C" {
 #define DRM_V3D_MMAP_BO                           0x03
 #define DRM_V3D_GET_PARAM                         0x04
 #define DRM_V3D_GET_BO_OFFSET                     0x05
+#define DRM_V3D_SUBMIT_TFU                        0x06
 
 #define DRM_IOCTL_V3D_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
 #define DRM_IOCTL_V3D_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
@@ -43,6 +44,7 @@ extern "C" {
 #define DRM_IOCTL_V3D_MMAP_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo)
 #define DRM_IOCTL_V3D_GET_PARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param)
 #define DRM_IOCTL_V3D_GET_BO_OFFSET       DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset)
+#define DRM_IOCTL_V3D_SUBMIT_TFU          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
 
 /**
  * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D
@@ -50,6 +52,14 @@ extern "C" {
  *
  * This asks the kernel to have the GPU execute an optional binner
  * command list, and a render command list.
+ *
+ * The L1T, slice, L2C, L2T, and GCA caches will be flushed before
+ * each CL executes.  The VCD cache should be flushed (if necessary)
+ * by the submitted CLs.  The TLB writes are guaranteed to have been
+ * flushed by the time the render done IRQ happens, which is the
+ * trigger for out_sync.  Any dirtying of cachelines by the job (only
+ * possible using TMU writes) must be flushed by the caller using the
+ * CL's cache flush commands.
  */
 struct drm_v3d_submit_cl {
 	/* Pointer to the binner command list.
@@ -179,6 +189,7 @@ enum drm_v3d_param {
 	DRM_V3D_PARAM_V3D_CORE0_IDENT0,
 	DRM_V3D_PARAM_V3D_CORE0_IDENT1,
 	DRM_V3D_PARAM_V3D_CORE0_IDENT2,
+	DRM_V3D_PARAM_SUPPORTS_TFU,
 };
 
 struct drm_v3d_get_param {
@@ -197,6 +208,28 @@ struct drm_v3d_get_bo_offset {
 	__u32 offset;
 };
 
+struct drm_v3d_submit_tfu {
+	__u32 icfg;
+	__u32 iia;
+	__u32 iis;
+	__u32 ica;
+	__u32 iua;
+	__u32 ioa;
+	__u32 ios;
+	__u32 coef[4];
+	/* First handle is the output BO, following are other inputs.
+	 * 0 for unused.
+	 */
+	__u32 bo_handles[4];
+	/* sync object to block on before running the TFU job.  Each TFU
+	 * job will execute in the order submitted to its FD.  Synchronization
+	 * against rendering jobs requires using sync objects.
+	 */
+	__u32 in_sync;
+	/* Sync object to signal when the TFU job is done. */
+	__u32 out_sync;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH i-g-t 1/4] headers: bump
  2019-01-08 11:24 [PATCH i-g-t 0/4] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
@ 2019-01-08 11:24 ` Tvrtko Ursulin
  0 siblings, 0 replies; 10+ messages in thread
From: Tvrtko Ursulin @ 2019-01-08 11:24 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

---
 include/drm-uapi/drm_fourcc.h |  2 +-
 include/drm-uapi/drm_mode.h   | 19 ++++++++++++++++
 include/drm-uapi/i915_drm.h   | 43 +++++++++++++++++++++++++++++++++++
 include/drm-uapi/msm_drm.h    | 25 +++++++++++++++-----
 include/drm-uapi/v3d_drm.h    | 33 +++++++++++++++++++++++++++
 5 files changed, 115 insertions(+), 7 deletions(-)

diff --git a/include/drm-uapi/drm_fourcc.h b/include/drm-uapi/drm_fourcc.h
index 4ddf754bab09..0b44260a5ee9 100644
--- a/include/drm-uapi/drm_fourcc.h
+++ b/include/drm-uapi/drm_fourcc.h
@@ -151,7 +151,7 @@ extern "C" {
 #define DRM_FORMAT_VYUY		fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */
 
 #define DRM_FORMAT_AYUV		fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */
-#define DRM_FORMAT_XYUV8888	fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */
+#define DRM_FORMAT_XYUV8888		fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */
 
 /*
  * packed YCbCr420 2x2 tiled formats
diff --git a/include/drm-uapi/drm_mode.h b/include/drm-uapi/drm_mode.h
index d3e0fe31efc5..a439c2e67896 100644
--- a/include/drm-uapi/drm_mode.h
+++ b/include/drm-uapi/drm_mode.h
@@ -888,6 +888,25 @@ struct drm_mode_revoke_lease {
 	__u32 lessee_id;
 };
 
+/**
+ * struct drm_mode_rect - Two dimensional rectangle.
+ * @x1: Horizontal starting coordinate (inclusive).
+ * @y1: Vertical starting coordinate (inclusive).
+ * @x2: Horizontal ending coordinate (exclusive).
+ * @y2: Vertical ending coordinate (exclusive).
+ *
+ * With drm subsystem using struct drm_rect to manage rectangular area this
+ * export it to user-space.
+ *
+ * Currently used by drm_mode_atomic blob property FB_DAMAGE_CLIPS.
+ */
+struct drm_mode_rect {
+	__s32 x1;
+	__s32 y1;
+	__s32 x2;
+	__s32 y2;
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
index e39b26d4bb3d..bc658583a2b1 100644
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -1486,9 +1486,52 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+	/*
+	 * When using the following param, value should be a pointer to
+	 * drm_i915_gem_context_param_sseu.
+	 */
+#define I915_CONTEXT_PARAM_SSEU		0x7
 	__u64 value;
 };
 
+struct drm_i915_gem_context_param_sseu {
+	/*
+	 * Engine class & instance to be configured or queried.
+	 */
+	__u16 class;
+	__u16 instance;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 flags;
+
+	/*
+	 * Mask of slices to enable for the context. Valid values are a subset
+	 * of the bitmask value returned for I915_PARAM_SLICE_MASK.
+	 */
+	__u64 slice_mask;
+
+	/*
+	 * Mask of subslices to enable for the context. Valid values are a
+	 * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK.
+	 */
+	__u64 subslice_mask;
+
+	/*
+	 * Minimum/Maximum number of EUs to enable per subslice for the
+	 * context. min_eus_per_subslice must be inferior or equal to
+	 * max_eus_per_subslice.
+	 */
+	__u16 min_eus_per_subslice;
+	__u16 max_eus_per_subslice;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 rsvd;
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */
diff --git a/include/drm-uapi/msm_drm.h b/include/drm-uapi/msm_drm.h
index c06d0a5bdd80..91a16b333c69 100644
--- a/include/drm-uapi/msm_drm.h
+++ b/include/drm-uapi/msm_drm.h
@@ -105,14 +105,24 @@ struct drm_msm_gem_new {
 	__u32 handle;         /* out */
 };
 
-#define MSM_INFO_IOVA	0x01
-
-#define MSM_INFO_FLAGS (MSM_INFO_IOVA)
+/* Get or set GEM buffer info.  The requested value can be passed
+ * directly in 'value', or for data larger than 64b 'value' is a
+ * pointer to userspace buffer, with 'len' specifying the number of
+ * bytes copied into that buffer.  For info returned by pointer,
+ * calling the GEM_INFO ioctl with null 'value' will return the
+ * required buffer size in 'len'
+ */
+#define MSM_INFO_GET_OFFSET	0x00   /* get mmap() offset, returned by value */
+#define MSM_INFO_GET_IOVA	0x01   /* get iova, returned by value */
+#define MSM_INFO_SET_NAME	0x02   /* set the debug name (by pointer) */
+#define MSM_INFO_GET_NAME	0x03   /* get debug name, returned by pointer */
 
 struct drm_msm_gem_info {
 	__u32 handle;         /* in */
-	__u32 flags;	      /* in - combination of MSM_INFO_* flags */
-	__u64 offset;         /* out, mmap() offset or iova */
+	__u32 info;           /* in - one of MSM_INFO_* */
+	__u64 value;          /* in or out */
+	__u32 len;            /* in or out */
+	__u32 pad;
 };
 
 #define MSM_PREP_READ        0x01
@@ -188,8 +198,11 @@ struct drm_msm_gem_submit_cmd {
  */
 #define MSM_SUBMIT_BO_READ             0x0001
 #define MSM_SUBMIT_BO_WRITE            0x0002
+#define MSM_SUBMIT_BO_DUMP             0x0004
 
-#define MSM_SUBMIT_BO_FLAGS            (MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE)
+#define MSM_SUBMIT_BO_FLAGS            (MSM_SUBMIT_BO_READ | \
+					MSM_SUBMIT_BO_WRITE | \
+					MSM_SUBMIT_BO_DUMP)
 
 struct drm_msm_gem_submit_bo {
 	__u32 flags;          /* in, mask of MSM_SUBMIT_BO_x */
diff --git a/include/drm-uapi/v3d_drm.h b/include/drm-uapi/v3d_drm.h
index b1e5de076b0f..ea70669d2138 100644
--- a/include/drm-uapi/v3d_drm.h
+++ b/include/drm-uapi/v3d_drm.h
@@ -36,6 +36,7 @@ extern "C" {
 #define DRM_V3D_MMAP_BO                           0x03
 #define DRM_V3D_GET_PARAM                         0x04
 #define DRM_V3D_GET_BO_OFFSET                     0x05
+#define DRM_V3D_SUBMIT_TFU                        0x06
 
 #define DRM_IOCTL_V3D_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
 #define DRM_IOCTL_V3D_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
@@ -43,6 +44,7 @@ extern "C" {
 #define DRM_IOCTL_V3D_MMAP_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo)
 #define DRM_IOCTL_V3D_GET_PARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param)
 #define DRM_IOCTL_V3D_GET_BO_OFFSET       DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset)
+#define DRM_IOCTL_V3D_SUBMIT_TFU          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
 
 /**
  * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D
@@ -50,6 +52,14 @@ extern "C" {
  *
  * This asks the kernel to have the GPU execute an optional binner
  * command list, and a render command list.
+ *
+ * The L1T, slice, L2C, L2T, and GCA caches will be flushed before
+ * each CL executes.  The VCD cache should be flushed (if necessary)
+ * by the submitted CLs.  The TLB writes are guaranteed to have been
+ * flushed by the time the render done IRQ happens, which is the
+ * trigger for out_sync.  Any dirtying of cachelines by the job (only
+ * possible using TMU writes) must be flushed by the caller using the
+ * CL's cache flush commands.
  */
 struct drm_v3d_submit_cl {
 	/* Pointer to the binner command list.
@@ -179,6 +189,7 @@ enum drm_v3d_param {
 	DRM_V3D_PARAM_V3D_CORE0_IDENT0,
 	DRM_V3D_PARAM_V3D_CORE0_IDENT1,
 	DRM_V3D_PARAM_V3D_CORE0_IDENT2,
+	DRM_V3D_PARAM_SUPPORTS_TFU,
 };
 
 struct drm_v3d_get_param {
@@ -197,6 +208,28 @@ struct drm_v3d_get_bo_offset {
 	__u32 offset;
 };
 
+struct drm_v3d_submit_tfu {
+	__u32 icfg;
+	__u32 iia;
+	__u32 iis;
+	__u32 ica;
+	__u32 iua;
+	__u32 ioa;
+	__u32 ios;
+	__u32 coef[4];
+	/* First handle is the output BO, following are other inputs.
+	 * 0 for unused.
+	 */
+	__u32 bo_handles[4];
+	/* sync object to block on before running the TFU job.  Each TFU
+	 * job will execute in the order submitted to its FD.  Synchronization
+	 * against rendering jobs requires using sync objects.
+	 */
+	__u32 in_sync;
+	/* Sync object to signal when the TFU job is done. */
+	__u32 out_sync;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH i-g-t 1/4] headers: bump
  2019-01-08 15:12 [PATCH i-g-t 0/4] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
@ 2019-01-08 15:13 ` Tvrtko Ursulin
  0 siblings, 0 replies; 10+ messages in thread
From: Tvrtko Ursulin @ 2019-01-08 15:13 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Sync with latest drm headers from drm-tip.

Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 include/drm-uapi/drm_fourcc.h |  2 +-
 include/drm-uapi/drm_mode.h   | 19 ++++++++++++++++
 include/drm-uapi/i915_drm.h   | 43 +++++++++++++++++++++++++++++++++++
 include/drm-uapi/msm_drm.h    | 25 +++++++++++++++-----
 include/drm-uapi/v3d_drm.h    | 33 +++++++++++++++++++++++++++
 5 files changed, 115 insertions(+), 7 deletions(-)

diff --git a/include/drm-uapi/drm_fourcc.h b/include/drm-uapi/drm_fourcc.h
index 4ddf754bab09..0b44260a5ee9 100644
--- a/include/drm-uapi/drm_fourcc.h
+++ b/include/drm-uapi/drm_fourcc.h
@@ -151,7 +151,7 @@ extern "C" {
 #define DRM_FORMAT_VYUY		fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */
 
 #define DRM_FORMAT_AYUV		fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */
-#define DRM_FORMAT_XYUV8888	fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */
+#define DRM_FORMAT_XYUV8888		fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */
 
 /*
  * packed YCbCr420 2x2 tiled formats
diff --git a/include/drm-uapi/drm_mode.h b/include/drm-uapi/drm_mode.h
index d3e0fe31efc5..a439c2e67896 100644
--- a/include/drm-uapi/drm_mode.h
+++ b/include/drm-uapi/drm_mode.h
@@ -888,6 +888,25 @@ struct drm_mode_revoke_lease {
 	__u32 lessee_id;
 };
 
+/**
+ * struct drm_mode_rect - Two dimensional rectangle.
+ * @x1: Horizontal starting coordinate (inclusive).
+ * @y1: Vertical starting coordinate (inclusive).
+ * @x2: Horizontal ending coordinate (exclusive).
+ * @y2: Vertical ending coordinate (exclusive).
+ *
+ * With drm subsystem using struct drm_rect to manage rectangular area this
+ * export it to user-space.
+ *
+ * Currently used by drm_mode_atomic blob property FB_DAMAGE_CLIPS.
+ */
+struct drm_mode_rect {
+	__s32 x1;
+	__s32 y1;
+	__s32 x2;
+	__s32 y2;
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
index e39b26d4bb3d..bc658583a2b1 100644
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -1486,9 +1486,52 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+	/*
+	 * When using the following param, value should be a pointer to
+	 * drm_i915_gem_context_param_sseu.
+	 */
+#define I915_CONTEXT_PARAM_SSEU		0x7
 	__u64 value;
 };
 
+struct drm_i915_gem_context_param_sseu {
+	/*
+	 * Engine class & instance to be configured or queried.
+	 */
+	__u16 class;
+	__u16 instance;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 flags;
+
+	/*
+	 * Mask of slices to enable for the context. Valid values are a subset
+	 * of the bitmask value returned for I915_PARAM_SLICE_MASK.
+	 */
+	__u64 slice_mask;
+
+	/*
+	 * Mask of subslices to enable for the context. Valid values are a
+	 * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK.
+	 */
+	__u64 subslice_mask;
+
+	/*
+	 * Minimum/Maximum number of EUs to enable per subslice for the
+	 * context. min_eus_per_subslice must be inferior or equal to
+	 * max_eus_per_subslice.
+	 */
+	__u16 min_eus_per_subslice;
+	__u16 max_eus_per_subslice;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 rsvd;
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */
diff --git a/include/drm-uapi/msm_drm.h b/include/drm-uapi/msm_drm.h
index c06d0a5bdd80..91a16b333c69 100644
--- a/include/drm-uapi/msm_drm.h
+++ b/include/drm-uapi/msm_drm.h
@@ -105,14 +105,24 @@ struct drm_msm_gem_new {
 	__u32 handle;         /* out */
 };
 
-#define MSM_INFO_IOVA	0x01
-
-#define MSM_INFO_FLAGS (MSM_INFO_IOVA)
+/* Get or set GEM buffer info.  The requested value can be passed
+ * directly in 'value', or for data larger than 64b 'value' is a
+ * pointer to userspace buffer, with 'len' specifying the number of
+ * bytes copied into that buffer.  For info returned by pointer,
+ * calling the GEM_INFO ioctl with null 'value' will return the
+ * required buffer size in 'len'
+ */
+#define MSM_INFO_GET_OFFSET	0x00   /* get mmap() offset, returned by value */
+#define MSM_INFO_GET_IOVA	0x01   /* get iova, returned by value */
+#define MSM_INFO_SET_NAME	0x02   /* set the debug name (by pointer) */
+#define MSM_INFO_GET_NAME	0x03   /* get debug name, returned by pointer */
 
 struct drm_msm_gem_info {
 	__u32 handle;         /* in */
-	__u32 flags;	      /* in - combination of MSM_INFO_* flags */
-	__u64 offset;         /* out, mmap() offset or iova */
+	__u32 info;           /* in - one of MSM_INFO_* */
+	__u64 value;          /* in or out */
+	__u32 len;            /* in or out */
+	__u32 pad;
 };
 
 #define MSM_PREP_READ        0x01
@@ -188,8 +198,11 @@ struct drm_msm_gem_submit_cmd {
  */
 #define MSM_SUBMIT_BO_READ             0x0001
 #define MSM_SUBMIT_BO_WRITE            0x0002
+#define MSM_SUBMIT_BO_DUMP             0x0004
 
-#define MSM_SUBMIT_BO_FLAGS            (MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE)
+#define MSM_SUBMIT_BO_FLAGS            (MSM_SUBMIT_BO_READ | \
+					MSM_SUBMIT_BO_WRITE | \
+					MSM_SUBMIT_BO_DUMP)
 
 struct drm_msm_gem_submit_bo {
 	__u32 flags;          /* in, mask of MSM_SUBMIT_BO_x */
diff --git a/include/drm-uapi/v3d_drm.h b/include/drm-uapi/v3d_drm.h
index b1e5de076b0f..ea70669d2138 100644
--- a/include/drm-uapi/v3d_drm.h
+++ b/include/drm-uapi/v3d_drm.h
@@ -36,6 +36,7 @@ extern "C" {
 #define DRM_V3D_MMAP_BO                           0x03
 #define DRM_V3D_GET_PARAM                         0x04
 #define DRM_V3D_GET_BO_OFFSET                     0x05
+#define DRM_V3D_SUBMIT_TFU                        0x06
 
 #define DRM_IOCTL_V3D_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
 #define DRM_IOCTL_V3D_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
@@ -43,6 +44,7 @@ extern "C" {
 #define DRM_IOCTL_V3D_MMAP_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo)
 #define DRM_IOCTL_V3D_GET_PARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param)
 #define DRM_IOCTL_V3D_GET_BO_OFFSET       DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset)
+#define DRM_IOCTL_V3D_SUBMIT_TFU          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
 
 /**
  * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D
@@ -50,6 +52,14 @@ extern "C" {
  *
  * This asks the kernel to have the GPU execute an optional binner
  * command list, and a render command list.
+ *
+ * The L1T, slice, L2C, L2T, and GCA caches will be flushed before
+ * each CL executes.  The VCD cache should be flushed (if necessary)
+ * by the submitted CLs.  The TLB writes are guaranteed to have been
+ * flushed by the time the render done IRQ happens, which is the
+ * trigger for out_sync.  Any dirtying of cachelines by the job (only
+ * possible using TMU writes) must be flushed by the caller using the
+ * CL's cache flush commands.
  */
 struct drm_v3d_submit_cl {
 	/* Pointer to the binner command list.
@@ -179,6 +189,7 @@ enum drm_v3d_param {
 	DRM_V3D_PARAM_V3D_CORE0_IDENT0,
 	DRM_V3D_PARAM_V3D_CORE0_IDENT1,
 	DRM_V3D_PARAM_V3D_CORE0_IDENT2,
+	DRM_V3D_PARAM_SUPPORTS_TFU,
 };
 
 struct drm_v3d_get_param {
@@ -197,6 +208,28 @@ struct drm_v3d_get_bo_offset {
 	__u32 offset;
 };
 
+struct drm_v3d_submit_tfu {
+	__u32 icfg;
+	__u32 iia;
+	__u32 iis;
+	__u32 ica;
+	__u32 iua;
+	__u32 ioa;
+	__u32 ios;
+	__u32 coef[4];
+	/* First handle is the output BO, following are other inputs.
+	 * 0 for unused.
+	 */
+	__u32 bo_handles[4];
+	/* sync object to block on before running the TFU job.  Each TFU
+	 * job will execute in the order submitted to its FD.  Synchronization
+	 * against rendering jobs requires using sync objects.
+	 */
+	__u32 in_sync;
+	/* Sync object to signal when the TFU job is done. */
+	__u32 out_sync;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH i-g-t 0/4] Per context dynamic (sub)slice power-gating
@ 2019-01-14 10:29 Tvrtko Ursulin
  2019-01-14 10:29 ` [PATCH i-g-t 1/4] headers: bump Tvrtko Ursulin
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Tvrtko Ursulin @ 2019-01-14 10:29 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

New in this version:
 * Updated for engine_class/engine_instance uAPI rename.

Lionel Landwerlin (1):
  tests/gem_ctx_sseu: Dynamic (sub)slice programming tests

Tony Ye (2):
  tests/gem_media_vme: Simple test to exercise the VME block
  tests/gem_media_vme: Shut down half of subslices to avoid gpu hang on
    ICL

Tvrtko Ursulin (1):
  headers: bump

 include/drm-uapi/drm_fourcc.h               |   2 +-
 include/drm-uapi/drm_mode.h                 |  19 +
 include/drm-uapi/i915_drm.h                 |  64 +++
 include/drm-uapi/msm_drm.h                  |  25 +-
 include/drm-uapi/v3d_drm.h                  |  33 ++
 lib/gpu_cmds.c                              | 148 ++++++
 lib/gpu_cmds.h                              |  23 +-
 lib/i915/shaders/media/README_media_vme.txt |  65 +++
 lib/i915/shaders/media/media_vme.gxa        |  51 ++
 lib/intel_batchbuffer.c                     |   9 +
 lib/intel_batchbuffer.h                     |   7 +
 lib/media_fill.c                            | 110 ++++
 lib/media_fill.h                            |   6 +
 lib/surfaceformat.h                         |   2 +
 tests/Makefile.am                           |   1 +
 tests/Makefile.sources                      |   6 +
 tests/i915/gem_ctx_param.c                  |   4 +-
 tests/i915/gem_ctx_sseu.c                   | 541 ++++++++++++++++++++
 tests/i915/gem_media_vme.c                  | 177 +++++++
 tests/meson.build                           |   9 +
 20 files changed, 1292 insertions(+), 10 deletions(-)
 create mode 100755 lib/i915/shaders/media/README_media_vme.txt
 create mode 100755 lib/i915/shaders/media/media_vme.gxa
 create mode 100644 tests/i915/gem_ctx_sseu.c
 create mode 100644 tests/i915/gem_media_vme.c

Test-with: 20190108151252.4214-1-tvrtko.ursulin@linux.intel.com
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH i-g-t 1/4] headers: bump
  2019-01-14 10:29 [PATCH i-g-t 0/4] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
@ 2019-01-14 10:29 ` Tvrtko Ursulin
  2019-01-14 10:29 ` [PATCH i-g-t 2/4] tests/gem_ctx_sseu: Dynamic (sub)slice programming tests Tvrtko Ursulin
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Tvrtko Ursulin @ 2019-01-14 10:29 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Sync with latest drm headers from drm-tip.

Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 include/drm-uapi/drm_fourcc.h |  2 +-
 include/drm-uapi/drm_mode.h   | 19 +++++++++++
 include/drm-uapi/i915_drm.h   | 64 +++++++++++++++++++++++++++++++++++
 include/drm-uapi/msm_drm.h    | 25 ++++++++++----
 include/drm-uapi/v3d_drm.h    | 33 ++++++++++++++++++
 5 files changed, 136 insertions(+), 7 deletions(-)

diff --git a/include/drm-uapi/drm_fourcc.h b/include/drm-uapi/drm_fourcc.h
index 4ddf754bab09..0b44260a5ee9 100644
--- a/include/drm-uapi/drm_fourcc.h
+++ b/include/drm-uapi/drm_fourcc.h
@@ -151,7 +151,7 @@ extern "C" {
 #define DRM_FORMAT_VYUY		fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */
 
 #define DRM_FORMAT_AYUV		fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */
-#define DRM_FORMAT_XYUV8888	fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */
+#define DRM_FORMAT_XYUV8888		fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */
 
 /*
  * packed YCbCr420 2x2 tiled formats
diff --git a/include/drm-uapi/drm_mode.h b/include/drm-uapi/drm_mode.h
index d3e0fe31efc5..a439c2e67896 100644
--- a/include/drm-uapi/drm_mode.h
+++ b/include/drm-uapi/drm_mode.h
@@ -888,6 +888,25 @@ struct drm_mode_revoke_lease {
 	__u32 lessee_id;
 };
 
+/**
+ * struct drm_mode_rect - Two dimensional rectangle.
+ * @x1: Horizontal starting coordinate (inclusive).
+ * @y1: Vertical starting coordinate (inclusive).
+ * @x2: Horizontal ending coordinate (exclusive).
+ * @y2: Vertical ending coordinate (exclusive).
+ *
+ * With drm subsystem using struct drm_rect to manage rectangular area this
+ * export it to user-space.
+ *
+ * Currently used by drm_mode_atomic blob property FB_DAMAGE_CLIPS.
+ */
+struct drm_mode_rect {
+	__s32 x1;
+	__s32 y1;
+	__s32 x2;
+	__s32 y2;
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
index e39b26d4bb3d..d2792ab3640b 100644
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -1486,9 +1486,73 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+	/*
+	 * When using the following param, value should be a pointer to
+	 * drm_i915_gem_context_param_sseu.
+	 */
+#define I915_CONTEXT_PARAM_SSEU		0x7
 	__u64 value;
 };
 
+/**
+ * Context SSEU programming
+ *
+ * It may be necessary for either functional or performance reason to configure
+ * a context to run with a reduced number of SSEU (where SSEU stands for Slice/
+ * Sub-slice/EU).
+ *
+ * This is done by configuring SSEU configuration using the below
+ * @struct drm_i915_gem_context_param_sseu for every supported engine which
+ * userspace intends to use.
+ *
+ * Not all GPUs or engines support this functionality in which case an error
+ * code -ENODEV will be returned.
+ *
+ * Also, flexibility of possible SSEU configuration permutations varies between
+ * GPU generations and software imposed limitations. Requesting such a
+ * combination will return an error code of -EINVAL.
+ *
+ * NOTE: When perf/OA is active the context's SSEU configuration is ignored in
+ * favour of a single global setting.
+ */
+struct drm_i915_gem_context_param_sseu {
+	/*
+	 * Engine class & instance to be configured or queried.
+	 */
+	__u16 engine_class;
+	__u16 engine_instance;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 flags;
+
+	/*
+	 * Mask of slices to enable for the context. Valid values are a subset
+	 * of the bitmask value returned for I915_PARAM_SLICE_MASK.
+	 */
+	__u64 slice_mask;
+
+	/*
+	 * Mask of subslices to enable for the context. Valid values are a
+	 * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK.
+	 */
+	__u64 subslice_mask;
+
+	/*
+	 * Minimum/Maximum number of EUs to enable per subslice for the
+	 * context. min_eus_per_subslice must be inferior or equal to
+	 * max_eus_per_subslice.
+	 */
+	__u16 min_eus_per_subslice;
+	__u16 max_eus_per_subslice;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 rsvd;
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */
diff --git a/include/drm-uapi/msm_drm.h b/include/drm-uapi/msm_drm.h
index c06d0a5bdd80..91a16b333c69 100644
--- a/include/drm-uapi/msm_drm.h
+++ b/include/drm-uapi/msm_drm.h
@@ -105,14 +105,24 @@ struct drm_msm_gem_new {
 	__u32 handle;         /* out */
 };
 
-#define MSM_INFO_IOVA	0x01
-
-#define MSM_INFO_FLAGS (MSM_INFO_IOVA)
+/* Get or set GEM buffer info.  The requested value can be passed
+ * directly in 'value', or for data larger than 64b 'value' is a
+ * pointer to userspace buffer, with 'len' specifying the number of
+ * bytes copied into that buffer.  For info returned by pointer,
+ * calling the GEM_INFO ioctl with null 'value' will return the
+ * required buffer size in 'len'
+ */
+#define MSM_INFO_GET_OFFSET	0x00   /* get mmap() offset, returned by value */
+#define MSM_INFO_GET_IOVA	0x01   /* get iova, returned by value */
+#define MSM_INFO_SET_NAME	0x02   /* set the debug name (by pointer) */
+#define MSM_INFO_GET_NAME	0x03   /* get debug name, returned by pointer */
 
 struct drm_msm_gem_info {
 	__u32 handle;         /* in */
-	__u32 flags;	      /* in - combination of MSM_INFO_* flags */
-	__u64 offset;         /* out, mmap() offset or iova */
+	__u32 info;           /* in - one of MSM_INFO_* */
+	__u64 value;          /* in or out */
+	__u32 len;            /* in or out */
+	__u32 pad;
 };
 
 #define MSM_PREP_READ        0x01
@@ -188,8 +198,11 @@ struct drm_msm_gem_submit_cmd {
  */
 #define MSM_SUBMIT_BO_READ             0x0001
 #define MSM_SUBMIT_BO_WRITE            0x0002
+#define MSM_SUBMIT_BO_DUMP             0x0004
 
-#define MSM_SUBMIT_BO_FLAGS            (MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE)
+#define MSM_SUBMIT_BO_FLAGS            (MSM_SUBMIT_BO_READ | \
+					MSM_SUBMIT_BO_WRITE | \
+					MSM_SUBMIT_BO_DUMP)
 
 struct drm_msm_gem_submit_bo {
 	__u32 flags;          /* in, mask of MSM_SUBMIT_BO_x */
diff --git a/include/drm-uapi/v3d_drm.h b/include/drm-uapi/v3d_drm.h
index b1e5de076b0f..ea70669d2138 100644
--- a/include/drm-uapi/v3d_drm.h
+++ b/include/drm-uapi/v3d_drm.h
@@ -36,6 +36,7 @@ extern "C" {
 #define DRM_V3D_MMAP_BO                           0x03
 #define DRM_V3D_GET_PARAM                         0x04
 #define DRM_V3D_GET_BO_OFFSET                     0x05
+#define DRM_V3D_SUBMIT_TFU                        0x06
 
 #define DRM_IOCTL_V3D_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
 #define DRM_IOCTL_V3D_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
@@ -43,6 +44,7 @@ extern "C" {
 #define DRM_IOCTL_V3D_MMAP_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo)
 #define DRM_IOCTL_V3D_GET_PARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param)
 #define DRM_IOCTL_V3D_GET_BO_OFFSET       DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset)
+#define DRM_IOCTL_V3D_SUBMIT_TFU          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
 
 /**
  * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D
@@ -50,6 +52,14 @@ extern "C" {
  *
  * This asks the kernel to have the GPU execute an optional binner
  * command list, and a render command list.
+ *
+ * The L1T, slice, L2C, L2T, and GCA caches will be flushed before
+ * each CL executes.  The VCD cache should be flushed (if necessary)
+ * by the submitted CLs.  The TLB writes are guaranteed to have been
+ * flushed by the time the render done IRQ happens, which is the
+ * trigger for out_sync.  Any dirtying of cachelines by the job (only
+ * possible using TMU writes) must be flushed by the caller using the
+ * CL's cache flush commands.
  */
 struct drm_v3d_submit_cl {
 	/* Pointer to the binner command list.
@@ -179,6 +189,7 @@ enum drm_v3d_param {
 	DRM_V3D_PARAM_V3D_CORE0_IDENT0,
 	DRM_V3D_PARAM_V3D_CORE0_IDENT1,
 	DRM_V3D_PARAM_V3D_CORE0_IDENT2,
+	DRM_V3D_PARAM_SUPPORTS_TFU,
 };
 
 struct drm_v3d_get_param {
@@ -197,6 +208,28 @@ struct drm_v3d_get_bo_offset {
 	__u32 offset;
 };
 
+struct drm_v3d_submit_tfu {
+	__u32 icfg;
+	__u32 iia;
+	__u32 iis;
+	__u32 ica;
+	__u32 iua;
+	__u32 ioa;
+	__u32 ios;
+	__u32 coef[4];
+	/* First handle is the output BO, following are other inputs.
+	 * 0 for unused.
+	 */
+	__u32 bo_handles[4];
+	/* sync object to block on before running the TFU job.  Each TFU
+	 * job will execute in the order submitted to its FD.  Synchronization
+	 * against rendering jobs requires using sync objects.
+	 */
+	__u32 in_sync;
+	/* Sync object to signal when the TFU job is done. */
+	__u32 out_sync;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH i-g-t 2/4] tests/gem_ctx_sseu: Dynamic (sub)slice programming tests
  2019-01-14 10:29 [PATCH i-g-t 0/4] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
  2019-01-14 10:29 ` [PATCH i-g-t 1/4] headers: bump Tvrtko Ursulin
@ 2019-01-14 10:29 ` Tvrtko Ursulin
  2019-01-14 10:29 ` [PATCH i-g-t 3/4] tests/gem_media_vme: Simple test to exercise the VME block Tvrtko Ursulin
  2019-01-14 10:29 ` [PATCH i-g-t 4/4] tests/gem_media_vme: Shut down half of subslices to avoid gpu hang on ICL Tvrtko Ursulin
  3 siblings, 0 replies; 10+ messages in thread
From: Tvrtko Ursulin @ 2019-01-14 10:29 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

Verify that the per-context dynamic SSEU uAPI works as expected.

v2: Add subslice tests (Lionel)
    Use MI_SET_PREDICATE for further verification when available (Lionel)

v3: Rename to gem_ctx_rpcs (Lionel)

v4: Update kernel API (Lionel)
    Add 0 value test (Lionel)
    Exercise invalid values (Lionel)

v5: Add perf tests (Lionel)

v6: Add new sysfs entry tests (Lionel)

v7: Test rsvd fields
    Update for kernel series changes

v8: Drop test_no_sseu_support() test (Kelvin)
    Drop drm_intel_*() apis (Chris)

v9: by Chris:
    Drop all do_ioctl/do_ioctl_err()
    Use gem_context_[gs]et_param()
    Use gem_read() instead of mapping memory
    by Lionel:
    Test dynamic sseu on/off more

Tvrtko Ursulin:

v10:
 * Various style tweaks and refactorings.
 * New test coverage.

v11:
 * Change platform support to just Gen11.
 * Simplify availability test. (Chris Wilson)
 * More invalid pointer tests. (Chris Wilson)

v12:
 * Fix MAP_FIXED use (doh!).
 * Fix get/set copy&paste errors.
 * Drop supported platform test. (Chris Wilson)
 * Add mmap__gtt test. (Chris Wilson)

v13:
 * Commit message tweaks.
 * Added reset/hang/suspend tests. (Chris Wilson)
 * Assert spinner is busy. (Chris Wilson)
 * Remove some more ABI assumptions. (Chris Wilson)

v14:
 * Use default resume time. (Chris Wilson)
 * Trigger hang after rpcs read batch has been submitted. (Chris Wilson)

v15:
 * Adjust for uAPI restrictions.

v16:
 * Build system changes.

v17:
 * Remove all subtests which read the RPCS register. (Joonas Lahtinen)

v18:
 * Tidy curly braces. (Joonas Lahtinen)

v19:
 * Check flags/rsvd MBZ.

v20:
 * Rebase for engine_class/engine_instance uapi change.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> # v14
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 tests/Makefile.am          |   1 +
 tests/Makefile.sources     |   3 +
 tests/i915/gem_ctx_param.c |   4 +-
 tests/i915/gem_ctx_sseu.c  | 541 +++++++++++++++++++++++++++++++++++++
 tests/meson.build          |   8 +
 5 files changed, 556 insertions(+), 1 deletion(-)
 create mode 100644 tests/i915/gem_ctx_sseu.c

diff --git a/tests/Makefile.am b/tests/Makefile.am
index 48d77535b6bd..42463bde7f30 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -111,6 +111,7 @@ gem_close_race_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gem_close_race_LDADD = $(LDADD) -lpthread
 gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gem_ctx_thrash_LDADD = $(LDADD) -lpthread
+gem_ctx_sseu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
 gem_exec_capture_LDADD = $(LDADD) -lz
 gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gem_exec_parallel_LDADD = $(LDADD) -lpthread
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index eedde1e817cb..3dfeb5b67274 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -161,6 +161,9 @@ gem_ctx_isolation_SOURCES = i915/gem_ctx_isolation.c
 TESTS_progs += gem_ctx_param
 gem_ctx_param_SOURCES = i915/gem_ctx_param.c
 
+TESTS_progs += gem_ctx_sseu
+gem_ctx_sseu_SOURCES = i915/gem_ctx_sseu.c
+
 TESTS_progs += gem_ctx_switch
 gem_ctx_switch_SOURCES = i915/gem_ctx_switch.c
 
diff --git a/tests/i915/gem_ctx_param.c b/tests/i915/gem_ctx_param.c
index 0bbc5effbf9f..acc1e6297750 100644
--- a/tests/i915/gem_ctx_param.c
+++ b/tests/i915/gem_ctx_param.c
@@ -294,11 +294,13 @@ igt_main
 			set_priority(fd);
 	}
 
+	/* I915_CONTEXT_PARAM_SSEU tests are located in gem_ctx_sseu.c */
+
 	/* NOTE: This testcase intentionally tests for the next free parameter
 	 * to catch ABI extensions. Don't "fix" this testcase without adding all
 	 * the tests for the new param first.
 	 */
-	arg.param = I915_CONTEXT_PARAM_PRIORITY + 1;
+	arg.param = I915_CONTEXT_PARAM_SSEU + 1;
 
 	igt_subtest("invalid-param-get") {
 		arg.ctx_id = ctx;
diff --git a/tests/i915/gem_ctx_sseu.c b/tests/i915/gem_ctx_sseu.c
new file mode 100644
index 000000000000..16609bee61c8
--- /dev/null
+++ b/tests/i915/gem_ctx_sseu.c
@@ -0,0 +1,541 @@
+/*
+ * Copyright © 2017-2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Lionel Landwerlin <lionel.g.landwerlin@intel.com>
+ *
+ */
+
+#include "igt.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+
+#include "igt_dummyload.h"
+#include "igt_perf.h"
+#include "igt_sysfs.h"
+#include "ioctl_wrappers.h"
+
+IGT_TEST_DESCRIPTION("Test context render powergating programming.");
+
+static unsigned int __intel_gen__, __intel_devid__;
+static uint64_t __slice_mask__, __subslice_mask__;
+static unsigned int __slice_count__, __subslice_count__;
+
+static uint64_t mask_minus_one(uint64_t mask)
+{
+	unsigned int i;
+
+	for (i = 0; i < (sizeof(mask) * 8 - 1); i++) {
+		if ((1ULL << i) & mask)
+			return mask & ~(1ULL << i);
+	}
+
+	igt_assert(0);
+	return 0;
+}
+
+static uint64_t mask_plus_one(uint64_t mask)
+{
+	unsigned int i;
+
+	for (i = 0; i < (sizeof(mask) * 8 - 1); i++) {
+		if (((1ULL << i) & mask) == 0)
+			return mask | (1ULL << i);
+	}
+
+	igt_assert(0);
+	return 0;
+}
+
+static uint64_t mask_minus(uint64_t mask, int n)
+{
+	unsigned int i;
+
+	for (i = 0; i < n; i++)
+		mask = mask_minus_one(mask);
+
+	return mask;
+}
+
+static uint64_t mask_plus(uint64_t mask, int n)
+{
+	unsigned int i;
+
+	for (i = 0; i < n; i++)
+		mask = mask_plus_one(mask);
+
+	return mask;
+}
+
+static bool
+kernel_has_per_context_sseu_support(int fd)
+{
+	struct drm_i915_gem_context_param_sseu sseu = { };
+	struct drm_i915_gem_context_param arg = {
+		.param = I915_CONTEXT_PARAM_SSEU,
+		.size = sizeof(sseu),
+		.value = to_user_pointer(&sseu),
+	};
+	int ret;
+
+	if (__gem_context_get_param(fd, &arg))
+		return false;
+
+	arg.value = to_user_pointer(&sseu);
+
+	ret = __gem_context_set_param(fd, &arg);
+
+	igt_assert(ret == 0 || ret == -ENODEV || ret == -EINVAL);
+
+	return ret == 0;
+}
+
+static bool has_engine(int fd, unsigned int class, unsigned int instance)
+{
+	int pmu = perf_i915_open(I915_PMU_ENGINE_BUSY(class, instance));
+
+	if (pmu >= 0)
+		close(pmu);
+
+	return pmu >= 0;
+}
+
+/*
+ * Verify that invalid engines are rejected and valid ones are accepted.
+ */
+static void test_engines(int fd)
+{
+	struct drm_i915_gem_context_param_sseu sseu = { };
+	struct drm_i915_gem_context_param arg = {
+		.param = I915_CONTEXT_PARAM_SSEU,
+		.ctx_id = gem_context_create(fd),
+		.size = sizeof(sseu),
+		.value = to_user_pointer(&sseu)
+	};
+	unsigned int class, instance;
+	int last_with_engines;
+
+	/* get_param */
+
+	sseu.engine_instance = -1; /* Assumed invalid. */
+	igt_assert_eq(__gem_context_get_param(fd, &arg), -EINVAL);
+
+	sseu.engine_class = I915_ENGINE_CLASS_INVALID; /* Both invalid. */
+	igt_assert_eq(__gem_context_get_param(fd, &arg), -EINVAL);
+
+	sseu.engine_instance = 0; /* Class invalid. */
+	igt_assert_eq(__gem_context_get_param(fd, &arg), -EINVAL);
+	sseu.engine_class = I915_ENGINE_CLASS_RENDER;
+
+	last_with_engines = -1;
+	for (class = 0; class < ~0; class++) {
+		for (instance = 0; instance < ~0; instance++) {
+			int ret;
+
+			sseu.engine_class = class;
+			sseu.engine_instance = instance;
+
+			ret = __gem_context_get_param(fd, &arg);
+
+			if (has_engine(fd, class, instance)) {
+				igt_assert_eq(ret, 0);
+				last_with_engines = class;
+			} else {
+				igt_assert_eq(ret, -EINVAL);
+				if (instance > 8) /* Skip over some instance holes. */
+					break;
+			}
+		}
+
+		if (class - last_with_engines > 8) /* Skip over some class holes. */
+			break;
+	}
+
+	/*
+	 * Get some proper values before trying to reprogram them onto
+	 * an invalid engine.
+	 */
+	sseu.engine_class = 0;
+	sseu.engine_instance = 0;
+	gem_context_get_param(fd, &arg);
+
+	/* set_param */
+
+	sseu.engine_instance = -1; /* Assumed invalid. */
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+
+	sseu.engine_class = I915_ENGINE_CLASS_INVALID; /* Both invalid. */
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+
+	sseu.engine_instance = 0; /* Class invalid. */
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+
+	last_with_engines = -1;
+	for (class = 0; class < ~0; class++) {
+		for (instance = 0; instance < ~0; instance++) {
+			int ret;
+
+			sseu.engine_class = class;
+			sseu.engine_instance = instance;
+
+			ret = __gem_context_set_param(fd, &arg);
+
+			if (has_engine(fd, class, instance)) {
+				igt_assert(ret == 0 || ret == -ENODEV);
+				last_with_engines = class;
+			} else {
+				igt_assert_eq(ret, -EINVAL);
+				if (instance > 8) /* Skip over some instance holes. */
+					break;
+			}
+		}
+
+		if (class - last_with_engines > 8) /* Skip over some class holes. */
+			break;
+	}
+
+	gem_context_destroy(fd, arg.ctx_id);
+}
+
+/*
+ * Verify that invalid arguments are rejected.
+ */
+static void
+test_invalid_args(int fd)
+{
+	struct drm_i915_gem_context_param arg = {
+		.param = I915_CONTEXT_PARAM_SSEU,
+		.ctx_id = gem_context_create(fd),
+	};
+	struct drm_i915_gem_context_param_sseu sseu = { };
+	unsigned char *page[2];
+	unsigned char *addr;
+	unsigned int sz;
+
+	/* get param */
+
+	/* Invalid size. */
+	arg.size = 1;
+	igt_assert_eq(__gem_context_get_param(fd, &arg), -EINVAL);
+
+	/* Query size. */
+	arg.size = 0;
+	igt_assert_eq(__gem_context_get_param(fd, &arg), 0);
+	sz = arg.size;
+
+	/* Bad pointers. */
+	igt_assert_eq(__gem_context_get_param(fd, &arg), -EFAULT);
+	arg.value = -1;
+	igt_assert_eq(__gem_context_get_param(fd, &arg), -EFAULT);
+	arg.value = 1;
+	igt_assert_eq(__gem_context_get_param(fd, &arg), -EFAULT);
+
+	/* Unmapped. */
+	page[0] = mmap(0, 4096, PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	igt_assert(page[0] != MAP_FAILED);
+	memset(page[0], 0, sizeof(sseu));
+	munmap(page[0], 4096);
+	arg.value = to_user_pointer(page[0]);
+	igt_assert_eq(__gem_context_get_param(fd, &arg), -EFAULT);
+
+	/* Straddle into unmapped area. */
+	page[0] = mmap(0, 8192, PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	igt_assert(page[0] != MAP_FAILED);
+	munmap(page[0], 8192);
+	page[0] = mmap(page[0], 4096,
+		       PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+	igt_assert(page[0] != MAP_FAILED);
+	memset(page[0], 0, sizeof(sseu));
+	page[1] = mmap((void *)((unsigned long)page[0] + 4096), 4096,
+		       PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+	igt_assert(page[1] != MAP_FAILED);
+	memset(page[1], 0, sizeof(sseu));
+	munmap(page[1], 4096);
+	arg.value = to_user_pointer(page[1]) -
+		    sizeof(struct drm_i915_gem_context_param_sseu) + 4;
+	igt_assert_eq(__gem_context_get_param(fd, &arg), -EFAULT);
+	munmap(page[0], 4096);
+
+	/* Straddle into read-only area. */
+	page[0] = mmap(0, 8192, PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	igt_assert(page[0] != MAP_FAILED);
+	munmap(page[0], 8192);
+	page[0] = mmap(page[0], 4096,
+		       PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+	igt_assert(page[0] != MAP_FAILED);
+	memset(page[0], 0, sizeof(sseu));
+	page[1] = mmap((void *)((unsigned long)page[0] + 4096), 4096,
+		       PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+	igt_assert(page[1] != MAP_FAILED);
+	memset(page[1], 0, sizeof(sseu));
+	igt_assert(mprotect(page[1], 4096, PROT_READ) == 0);
+	arg.value = to_user_pointer(page[1] - sizeof(sseu) + 4);
+	igt_assert_eq(__gem_context_get_param(fd, &arg), -EFAULT);
+	munmap(page[0], 4096);
+	munmap(page[1], 4096);
+
+	/* set param */
+
+	/* Invalid sizes. */
+	arg.size = 1;
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+
+	arg.size = 0;
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+	arg.size = sz;
+
+	/* Bad pointers. */
+	arg.value = 0;
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EFAULT);
+	arg.value = -1;
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EFAULT);
+	arg.value = 1;
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EFAULT);
+
+	/* Get valid SSEU. */
+	arg.value = to_user_pointer(&sseu);
+	igt_assert_eq(__gem_context_get_param(fd, &arg), 0);
+
+	/* Invalid MBZ. */
+	sseu.flags = -1;
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+	sseu.rsvd = -1;
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+	sseu.flags = 0;
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+	sseu.rsvd = 0;
+
+	/* Unmapped. */
+	page[0] = mmap(0, 4096, PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	igt_assert(page[0] != MAP_FAILED);
+	memcpy(page[0], &sseu, sizeof(sseu));
+	munmap(page[0], 4096);
+	arg.value = to_user_pointer(page[0]);
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EFAULT);
+
+	/* Straddle into unmapped area. */
+	page[0] = mmap(0, 8192, PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+	igt_assert(page[0] != MAP_FAILED);
+	munmap(page[0], 8192);
+	page[0] = mmap(page[0], 4096,
+		       PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+	igt_assert(page[0] != MAP_FAILED);
+	page[1] = mmap((void *)((unsigned long)page[0] + 4096), 4096,
+		       PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0);
+	igt_assert(page[1] != MAP_FAILED);
+	addr = page[1] - sizeof(sseu) + 4;
+	memcpy(addr, &sseu, sizeof(sseu));
+	munmap(page[1], 4096);
+	arg.value = to_user_pointer(addr);
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EFAULT);
+	munmap(page[0], 4096);
+
+	gem_context_destroy(fd, arg.ctx_id);
+}
+
+/*
+ * Verify that ggtt mapped area can be used as the sseu pointer.
+ */
+static void
+test_ggtt_args(int fd)
+{
+	struct drm_i915_gem_context_param_sseu *sseu;
+	struct drm_i915_gem_context_param arg = {
+		.param = I915_CONTEXT_PARAM_SSEU,
+		.ctx_id = gem_context_create(fd),
+		.size = sizeof(*sseu),
+	};
+	uint32_t bo;
+
+	bo = gem_create(fd, 4096);
+	arg.value = to_user_pointer(gem_mmap__gtt(fd, bo, 4096,
+						  PROT_READ | PROT_WRITE));
+
+	igt_assert_eq(__gem_context_get_param(fd, &arg), 0);
+	igt_assert_eq(__gem_context_set_param(fd, &arg), 0);
+
+	munmap((void *)arg.value, 4096);
+	gem_close(fd, bo);
+	gem_context_destroy(fd, arg.ctx_id);
+}
+
+/*
+ * Verify that invalid SSEU values are rejected.
+ */
+static void
+test_invalid_sseu(int fd)
+{
+	struct drm_i915_gem_context_param_sseu device_sseu = { };
+	struct drm_i915_gem_context_param_sseu sseu = { };
+	struct drm_i915_gem_context_param arg = {
+		.param = I915_CONTEXT_PARAM_SSEU,
+		.ctx_id = gem_context_create(fd),
+		.size = sizeof(sseu),
+	};
+	unsigned int i;
+
+	/* Fetch the device defaults. */
+	arg.value = to_user_pointer(&device_sseu);
+	gem_context_get_param(fd, &arg);
+
+	arg.value = to_user_pointer(&sseu);
+
+	/* Try all slice masks known to be invalid. */
+	sseu = device_sseu;
+	for (i = 1; i <= (8 - __slice_count__); i++) {
+		sseu.slice_mask = mask_plus(__slice_mask__, i);
+		igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+	}
+
+	/* 0 slices. */
+	sseu.slice_mask = 0;
+	igt_assert_eq(-EINVAL, __gem_context_set_param(fd, &arg));
+
+	/* Try all subslice masks known to be invalid. */
+	sseu = device_sseu;
+	for (i = 1; i <= (8 - __subslice_count__); i++) {
+		sseu.subslice_mask = mask_plus(__subslice_mask__, i);
+		igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+	}
+
+	/* 0 subslices. */
+	sseu.subslice_mask = 0;
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+
+	/* Try number of EUs superior to the max available. */
+	sseu = device_sseu;
+	sseu.min_eus_per_subslice = device_sseu.max_eus_per_subslice + 1;
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+
+	sseu = device_sseu;
+	sseu.max_eus_per_subslice = device_sseu.max_eus_per_subslice + 1;
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+
+	/* Try to program 0 max EUs. */
+	sseu = device_sseu;
+	sseu.max_eus_per_subslice = 0;
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+
+	/* Min > max */
+	sseu = device_sseu;
+	sseu.min_eus_per_subslice = sseu.max_eus_per_subslice;
+	sseu.max_eus_per_subslice = 1;
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+
+	if (__intel_gen__ != 11)
+		goto out;
+
+	/* Subset of subslices but slice mask greater than one. */
+	if (__slice_count__ > 1) {
+		sseu = device_sseu;
+		sseu.subslice_mask = mask_minus_one(sseu.subslice_mask);
+		igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+	}
+
+	/* Odd subslices above four. */
+	sseu = device_sseu;
+	sseu.slice_mask = 0x1;
+	sseu.subslice_mask = mask_minus_one(sseu.subslice_mask);
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+
+	/* More than half subslices with one slice. */
+	sseu = device_sseu;
+	sseu.slice_mask = 0x1;
+	sseu.subslice_mask = mask_minus(sseu.subslice_mask,
+					__subslice_count__ / 2 - 1);
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+
+	/* VME */
+
+	/* Slice count between one and max. */
+	if (__slice_count__ > 2) {
+		sseu = device_sseu;
+		sseu.slice_mask = mask_minus_one(sseu.slice_mask);
+		igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+	}
+
+	/* Less than half subslices with one slice. */
+	sseu = device_sseu;
+	sseu.slice_mask = 0x1;
+	sseu.subslice_mask = mask_minus(sseu.subslice_mask,
+					__subslice_count__ / 2 + 1);
+	igt_assert_eq(__gem_context_set_param(fd, &arg), -EINVAL);
+
+out:
+	gem_context_destroy(fd, arg.ctx_id);
+}
+
+igt_main
+{
+	int fd;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(fd);
+
+		__intel_devid__ = intel_get_drm_devid(fd);
+		__intel_gen__ = intel_gen(__intel_devid__);
+
+		igt_require(kernel_has_per_context_sseu_support(fd));
+	}
+
+	igt_subtest_group {
+		igt_fixture {
+			drm_i915_getparam_t gp;
+
+			gp.param = I915_PARAM_SLICE_MASK;
+			gp.value = (int *) &__slice_mask__;
+			do_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
+			__slice_count__ = __builtin_popcount(__slice_mask__);
+
+			gp.param = I915_PARAM_SUBSLICE_MASK;
+			gp.value = (int *) &__subslice_mask__;
+			do_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
+			__subslice_count__ =
+				__builtin_popcount(__subslice_mask__);
+		}
+
+		igt_subtest("invalid-args")
+			test_invalid_args(fd);
+
+		igt_subtest("invalid-sseu")
+			test_invalid_sseu(fd);
+
+		igt_subtest("ggtt-args")
+			test_ggtt_args(fd);
+
+		igt_subtest("engines")
+			test_engines(fd);
+	}
+
+	igt_fixture {
+		close(fd);
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index b8a6e61b3404..f41f724af8af 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -288,6 +288,14 @@ foreach prog : gem_progs + gen3_progs
 	test_list += prog
 endforeach
 
+test_executables += executable('gem_ctx_sseu',
+	   join_paths('i915', 'gem_ctx_sseu.c'),
+	   dependencies : test_deps + [ lib_igt_perf ],
+	   install_dir : libexecdir,
+	   install_rpath : libexecdir_rpathdir,
+	   install : true)
+test_progs += 'gem_ctx_sseu'
+
 test_executables += executable('gem_eio',
 	   join_paths('i915', 'gem_eio.c'),
 	   dependencies : test_deps + [ realtime ],
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH i-g-t 3/4] tests/gem_media_vme: Simple test to exercise the VME block
  2019-01-14 10:29 [PATCH i-g-t 0/4] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
  2019-01-14 10:29 ` [PATCH i-g-t 1/4] headers: bump Tvrtko Ursulin
  2019-01-14 10:29 ` [PATCH i-g-t 2/4] tests/gem_ctx_sseu: Dynamic (sub)slice programming tests Tvrtko Ursulin
@ 2019-01-14 10:29 ` Tvrtko Ursulin
  2019-01-14 10:29 ` [PATCH i-g-t 4/4] tests/gem_media_vme: Shut down half of subslices to avoid gpu hang on ICL Tvrtko Ursulin
  3 siblings, 0 replies; 10+ messages in thread
From: Tvrtko Ursulin @ 2019-01-14 10:29 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tony Ye <tony.ye@intel.com>

Simple test which exercises the VME fixed function block.

v2: (Tvrtko Ursulin)
 * Small cleanups like copyright date, tabs, remove unused bits.

v3: (Tony Ye)
 * Added curbe data entry for dst surface.
 * Read the dst surface after the VME kernel being executed.

v4: (Tony Ye)
 * Added the media_vme.gxa kernel source code and compile instructions.

v5: (Tvrtko Ursulin)
 * Added hang detector.

v6: (Tvrtko Ursulin)
 * Replace gem_read with gem_sync. (Chris Wilson)

Signed-off-by: Tony Ye <tony.ye@intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Tony Ye <tony.ye@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 lib/gpu_cmds.c                              | 136 ++++++++++++++++++++
 lib/gpu_cmds.h                              |  20 ++-
 lib/i915/shaders/media/README_media_vme.txt |  65 ++++++++++
 lib/i915/shaders/media/media_vme.gxa        |  51 ++++++++
 lib/intel_batchbuffer.c                     |   9 ++
 lib/intel_batchbuffer.h                     |   7 +
 lib/media_fill.c                            | 110 ++++++++++++++++
 lib/media_fill.h                            |   6 +
 lib/surfaceformat.h                         |   2 +
 tests/Makefile.sources                      |   3 +
 tests/i915/gem_media_vme.c                  | 117 +++++++++++++++++
 tests/meson.build                           |   1 +
 12 files changed, 525 insertions(+), 2 deletions(-)
 create mode 100755 lib/i915/shaders/media/README_media_vme.txt
 create mode 100755 lib/i915/shaders/media/media_vme.gxa
 create mode 100644 tests/i915/gem_media_vme.c

diff --git a/lib/gpu_cmds.c b/lib/gpu_cmds.c
index 556a94c6f0b6..b490a63bdfef 100644
--- a/lib/gpu_cmds.c
+++ b/lib/gpu_cmds.c
@@ -52,6 +52,22 @@ gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
 	return offset;
 }
 
+uint32_t
+gen11_fill_curbe_buffer_data(struct intel_batchbuffer *batch)
+{
+	uint32_t *curbe_buffer;
+	uint32_t offset;
+
+	curbe_buffer = intel_batchbuffer_subdata_alloc(batch,
+						       sizeof(uint32_t) * 8,
+						       64);
+	offset = intel_batchbuffer_subdata_offset(batch, curbe_buffer);
+	*curbe_buffer++ = 0;
+	*curbe_buffer   = 1;
+
+	return offset;
+}
+
 uint32_t
 gen7_fill_surface_state(struct intel_batchbuffer *batch,
 			const struct igt_buf *buf,
@@ -119,6 +135,26 @@ gen7_fill_binding_table(struct intel_batchbuffer *batch,
 	return offset;
 }
 
+uint32_t
+gen11_fill_binding_table(struct intel_batchbuffer *batch,
+			const struct igt_buf *src,const struct igt_buf *dst)
+{
+	uint32_t *binding_table, offset;
+
+	binding_table = intel_batchbuffer_subdata_alloc(batch, 64, 64);
+	offset = intel_batchbuffer_subdata_offset(batch, binding_table);
+	binding_table[0] = gen11_fill_surface_state(batch, src,
+						SURFACE_1D,SURFACEFORMAT_R32G32B32A32_FLOAT,
+						0,0,
+						0);
+	binding_table[1] = gen11_fill_surface_state(batch, dst,
+						SURFACE_BUFFER, SURFACEFORMAT_RAW,
+						1,1,
+						1);
+
+	return offset;
+}
+
 uint32_t
 gen7_fill_kernel(struct intel_batchbuffer *batch,
 		const uint32_t kernel[][4],
@@ -384,6 +420,71 @@ gen8_fill_surface_state(struct intel_batchbuffer *batch,
 	return offset;
 }
 
+uint32_t
+gen11_fill_surface_state(struct intel_batchbuffer *batch,
+			const struct igt_buf *buf,
+			uint32_t surface_type,
+			uint32_t format,
+			uint32_t vertical_alignment,
+			uint32_t horizontal_alignment,
+			int is_dst)
+{
+	struct gen8_surface_state *ss;
+	uint32_t write_domain, read_domain, offset;
+	int ret;
+
+	if (is_dst) {
+		write_domain = read_domain = I915_GEM_DOMAIN_RENDER;
+	} else {
+		write_domain = 0;
+		read_domain = I915_GEM_DOMAIN_SAMPLER;
+	}
+
+	ss = intel_batchbuffer_subdata_alloc(batch, sizeof(*ss), 64);
+	offset = intel_batchbuffer_subdata_offset(batch, ss);
+
+	ss->ss0.surface_type = surface_type;
+	ss->ss0.surface_format = format;
+	ss->ss0.render_cache_read_write = 1;
+	ss->ss0.vertical_alignment = vertical_alignment; /* align 4 */
+	ss->ss0.horizontal_alignment = horizontal_alignment; /* align 4 */
+
+	if (buf->tiling == I915_TILING_X)
+		ss->ss0.tiled_mode = 2;
+	else if (buf->tiling == I915_TILING_Y)
+		ss->ss0.tiled_mode = 3;
+	else
+		ss->ss0.tiled_mode = 0;
+
+	ss->ss8.base_addr = buf->bo->offset;
+
+	ret = drm_intel_bo_emit_reloc(batch->bo,
+				intel_batchbuffer_subdata_offset(batch, ss) + 8 * 4,
+				buf->bo, 0, read_domain, write_domain);
+	igt_assert(ret == 0);
+
+	if (is_dst) {
+		ss->ss1.memory_object_control = 2;
+		ss->ss2.height = 1;
+		ss->ss2.width  = 95;
+		ss->ss3.pitch  = 0;
+		ss->ss7.shader_chanel_select_r = 4;
+		ss->ss7.shader_chanel_select_g = 5;
+		ss->ss7.shader_chanel_select_b = 6;
+		ss->ss7.shader_chanel_select_a = 7;
+	}
+	else {
+		ss->ss1.qpitch = 4040;
+		ss->ss1.base_mip_level = 31;
+		ss->ss2.height = 9216;
+		ss->ss2.width  = 1019;
+		ss->ss3.pitch  = 64;
+		ss->ss5.mip_count = 2;
+	}
+
+	return offset;
+}
+
 uint32_t
 gen8_fill_interface_descriptor(struct intel_batchbuffer *batch,
 			       const struct igt_buf *dst,
@@ -419,6 +520,41 @@ gen8_fill_interface_descriptor(struct intel_batchbuffer *batch,
 	return offset;
 }
 
+uint32_t
+gen11_fill_interface_descriptor(struct intel_batchbuffer *batch,
+			       const struct igt_buf *src,const struct igt_buf *dst,
+			       const uint32_t kernel[][4],
+			       size_t size)
+{
+	struct gen8_interface_descriptor_data *idd;
+	uint32_t offset;
+	uint32_t binding_table_offset, kernel_offset;
+
+	binding_table_offset = gen11_fill_binding_table(batch, src,dst);
+	kernel_offset = gen7_fill_kernel(batch, kernel, size);
+
+	idd = intel_batchbuffer_subdata_alloc(batch, sizeof(*idd), 64);
+	offset = intel_batchbuffer_subdata_offset(batch, idd);
+
+	idd->desc0.kernel_start_pointer = (kernel_offset >> 6);
+
+	idd->desc2.single_program_flow = 1;
+	idd->desc2.floating_point_mode = GEN8_FLOATING_POINT_IEEE_754;
+
+	idd->desc3.sampler_count = 0;      /* 0 samplers used */
+	idd->desc3.sampler_state_pointer = 0;
+
+	idd->desc4.binding_table_entry_count = 0;
+	idd->desc4.binding_table_pointer = (binding_table_offset >> 5);
+
+	idd->desc5.constant_urb_entry_read_offset = 0;
+	idd->desc5.constant_urb_entry_read_length = 1; /* grf 1 */
+
+	idd->desc6.num_threads_in_tg = 1;
+
+	return offset;
+}
+
 void
 gen8_emit_state_base_address(struct intel_batchbuffer *batch)
 {
diff --git a/lib/gpu_cmds.h b/lib/gpu_cmds.h
index 79bc4d6473ba..ca671fb52daf 100644
--- a/lib/gpu_cmds.h
+++ b/lib/gpu_cmds.h
@@ -43,6 +43,8 @@ gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
 uint32_t
 gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
 			uint8_t color);
+uint32_t
+gen11_fill_curbe_buffer_data(struct intel_batchbuffer *batch);
 
 uint32_t
 gen7_fill_surface_state(struct intel_batchbuffer *batch,
@@ -53,6 +55,9 @@ gen7_fill_surface_state(struct intel_batchbuffer *batch,
 uint32_t
 gen7_fill_binding_table(struct intel_batchbuffer *batch,
 			const struct igt_buf *dst);
+uint32_t
+gen11_fill_binding_table(struct intel_batchbuffer *batch,
+			const struct igt_buf *src,const struct igt_buf *dst);
 
 uint32_t
 gen7_fill_kernel(struct intel_batchbuffer *batch,
@@ -99,13 +104,24 @@ gen8_fill_surface_state(struct intel_batchbuffer *batch,
 			const struct igt_buf *buf,
 			uint32_t format,
 			int is_dst);
-
+uint32_t
+gen11_fill_surface_state(struct intel_batchbuffer *batch,
+			const struct igt_buf *buf,
+			uint32_t surface_type,
+			uint32_t format,
+			uint32_t vertical_alignment,
+			uint32_t horizontal_alignment,
+			int is_dst);
 uint32_t
 gen8_fill_interface_descriptor(struct intel_batchbuffer *batch,
 			       const struct igt_buf *dst,
 			       const uint32_t kernel[][4],
 			       size_t size);
-
+uint32_t
+gen11_fill_interface_descriptor(struct intel_batchbuffer *batch,
+			       const struct igt_buf *src,const struct igt_buf *dst,
+			       const uint32_t kernel[][4],
+			       size_t size);
 void
 gen8_emit_state_base_address(struct intel_batchbuffer *batch);
 
diff --git a/lib/i915/shaders/media/README_media_vme.txt b/lib/i915/shaders/media/README_media_vme.txt
new file mode 100755
index 000000000000..2470fdd89825
--- /dev/null
+++ b/lib/i915/shaders/media/README_media_vme.txt
@@ -0,0 +1,65 @@
+Step1: Building IGA (Intel Graphics Assembler)
+========================================================================
+
+1. Download or clone IGC (Intel Graphics Compiler)
+
+   https://github.com/intel/intel-graphics-compiler.git
+
+2. Chdir into 'intel-graphics-compiler' (or any other workspace folder of choice)
+
+   It should read the following folder strucutre:
+
+   workspace
+      |- visa
+      |- IGC
+      |- inc
+      |- 3d
+      |- skuwa
+
+3. Chdir into IGA sub-component
+
+   cd visa/iga
+
+4. Create build directory
+
+    mkdir build
+
+5. Change into build directory
+
+    cd build
+
+6. Run cmake
+
+   cmake ../
+
+7. Run make to build IGA project
+
+   make
+
+8. Get the output executable "iga64" in IGAExe folder
+
+   usage: ./iga64 OPTIONS ARGS
+   where OPTIONS:
+     -h     --help                     shows help on an option
+     -d     --disassemble              disassembles the input file
+     -a     --assemble                 assembles the input file
+     -n     --numeric-labels           use numeric labels
+     -p     --platform        DEVICE   specifies the platform (e.g. "GEN9")
+     -o     --output          FILE     specifies the output file
+
+   EXAMPLES:
+   ./iga64  file.gxa  -p=11 -a  -o file.krn
+
+Step2: Building ASM code
+========================================================================
+1. Command line to convert asm code to binary:
+
+   iga64 media_vme.gxa -p=11 -a -o media_vme.krn
+
+2. Pad 128 bytes zeros to the kernel:
+
+   dd if=/dev/zero bs=1 count=128 >> media_vme.krn
+
+3. Generate hexdump:
+
+   hexdump -v  -e '4/4 "0x%08x " "\n"' media_vme.krn > media_vme.hex
diff --git a/lib/i915/shaders/media/media_vme.gxa b/lib/i915/shaders/media/media_vme.gxa
new file mode 100755
index 000000000000..93a0ed2f12d6
--- /dev/null
+++ b/lib/i915/shaders/media/media_vme.gxa
@@ -0,0 +1,51 @@
+/*
+* Copyright (c) 2018, Intel Corporation
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included
+* in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+* OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+       mov (8|M0)       r1.8<1>:w       0x20000000:v
+       mov (8|M0)       r20.0<1>:w      0x1:v
+(W)    mov (1|M0)       r1.2<1>:f       0x3818000C:f
+       mov (8|M0)       r20.8<1>:w      0x10:v
+       mov (8|M0)       r21.0<1>:w      0x10000:v
+(W)    mov (1|M0)       r1.3<1>:f       0x22222222:f
+       add (1|M0)       a0.0<1>:d       r1.0<0;1,0>:d     276307968:d
+       mov (8|M0)       r2.0<1>:f       0x0:f
+       mov (8|M0)       r5.0<1>:f       0x0:f
+       mov (8|M0)       r6.0<1>:f       0x0:f
+       mov (8|M0)       r9.0<1>:f       0x0:f
+       mov (8|M0)       r3.0<1>:d       r1.8<8;8,1>:w
+       mul (8|M0)       r4.0<1>:d       r1.2<0;1,0>:d     r20.0<8;8,1>:w
+       mul (8|M0)       r7.0<1>:d       r20.8<8;8,1>:w    256:w
+       mul (8|M0)       r8.0<1>:d       r1.3<0;1,0>:d     r21.0<8;8,1>:w
+(W)    mov (1|M0)       r17.2<1>:f      0x0:f
+       send (16|M0)     r10:uw          r2:f              0xD     a0.0{NoPreempt}
+       mov (1|M0)       r10.14<1>:hf    0xBEEF:hf
+(W)    add (1|M0)       a0.0<1>:ud      r1.1<0;1,0>:ud    0x20A0400:ud
+       mov (1|M0)       r10.15<1>:hf    0xDEAD:hf
+(W)    mov (1|M0)       r18.2<1>:f      0x8:f
+(W)    mov (1|M0)       r19.2<1>:f      0xC:f
+(W)    mov (8|M0)       r127.0<1>:f     r0.0<8;8,1>:f
+(W)    sends (16|M0)    null:uw         r17               r10     0x10A    a0.0
+(W)    add (1|M0)       a0.0<1>:ud      r1.1<0;1,0>:ud    0x20A0300:ud
+(W)    sends (16|M0)    null:uw         r18               r14     0x8A     a0.0
+(W)    add (1|M0)       a0.0<1>:ud      r1.1<0;1,0>:ud    0x20A0200:ud
+(W)    sends (8|M0)     null:ud         r19               r16     0x4A     a0.0
+(W)    send (8|M0)      null            r127:f            0x27    0x2000010 {EOT}
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index ad2e718f898c..22697c94fb13 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -873,6 +873,15 @@ igt_fillfunc_t igt_get_media_fillfunc(int devid)
 	return fill;
 }
 
+igt_vme_func_t igt_get_media_vme_func(int devid)
+{
+	igt_vme_func_t fill = NULL;
+
+	if (IS_GEN9(devid) || IS_GEN10(devid) || IS_GEN11(devid))
+		fill = gen11_media_vme_func;
+
+	return fill;
+}
 /**
  * igt_get_gpgpu_fillfunc:
  * @devid: pci device id
diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
index ecc23f08da77..e5f6e6d045b8 100644
--- a/lib/intel_batchbuffer.h
+++ b/lib/intel_batchbuffer.h
@@ -325,6 +325,13 @@ typedef void (*igt_fillfunc_t)(struct intel_batchbuffer *batch,
 igt_fillfunc_t igt_get_media_fillfunc(int devid);
 igt_fillfunc_t igt_get_gpgpu_fillfunc(int devid);
 
+typedef void (*igt_vme_func_t)(struct intel_batchbuffer *batch,
+			       const struct igt_buf *src,
+			       unsigned int width, unsigned int height,
+			       const struct igt_buf *dst);
+
+igt_vme_func_t igt_get_media_vme_func(int devid);
+
 /**
  * igt_media_spinfunc_t:
  * @batch: batchbuffer object
diff --git a/lib/media_fill.c b/lib/media_fill.c
index 4942229505ff..b1e84727394a 100644
--- a/lib/media_fill.c
+++ b/lib/media_fill.c
@@ -61,6 +61,46 @@ static const uint32_t gen8_media_kernel[][4] = {
 	{ 0x07800031, 0x20000a40, 0x0e000e00, 0x82000010 },
 };
 
+static const uint32_t gen11_media_vme_kernel[][4] = {
+	{ 0x00600001, 0x20302e68,  0x00000000,  0x20000000 },
+	{ 0x00600001, 0x22802e68,  0x00000000,  0x00000001 },
+	{ 0x00000001, 0x20284f2c,  0x00000000,  0x3818000c },
+	{ 0x00600001, 0x22902e68,  0x00000000,  0x00000010 },
+	{ 0x00600001, 0x22a02e68,  0x00000000,  0x00010000 },
+	{ 0x00000001, 0x202c4f2c,  0x00000000,  0x22222222 },
+	{ 0x00000040, 0x22000a20,  0x0e000020,  0x10782000 },
+	{ 0x00600001, 0x20404f28,  0x00000000,  0x00000000 },
+	{ 0x00600001, 0x20a04f28,  0x00000000,  0x00000000 },
+	{ 0x00600001, 0x20c04f28,  0x00000000,  0x00000000 },
+	{ 0x00600001, 0x21204f28,  0x00000000,  0x00000000 },
+	{ 0x00600001, 0x20601a28,  0x008d0030,  0x00000000 },
+	{ 0x00600041, 0x20800a28,  0x1a000028,  0x008d0280 },
+	{ 0x00600041, 0x20e01a28,  0x1e8d0290,  0x01000100 },
+	{ 0x00600041, 0x21000a28,  0x1a00002c,  0x008d02a0 },
+	{ 0x00000001, 0x22284f2c,  0x00000000,  0x00000000 },
+	{ 0x0d80c031, 0x21404a48,  0x00000040,  0x00000200 },
+	{ 0x00000001, 0x215c4708,  0x00000000,  0xbeefbeef },
+	{ 0x00000040, 0x22000204,  0x06000024,  0x020a0400 },
+	{ 0x00000001, 0x215e4708,  0x00000000,  0xdeaddead },
+	{ 0x00000001, 0x22484f2c,  0x00000000,  0x00000008 },
+	{ 0x00000001, 0x22684f2c,  0x00000000,  0x0000000c },
+	{ 0x00600001, 0x2fe04b2c,  0x008d0000,  0x00000000 },
+	{ 0x0a800033, 0x0000a054,  0x00002224,  0x00000000 },
+	{ 0x00000040, 0x22000204,  0x06000024,  0x020a0300 },
+	{ 0x0a800033, 0x0000e054,  0x00002242,  0x00000000 },
+	{ 0x00000040, 0x22000204,  0x06000024,  0x020a0200 },
+	{ 0x0a600033, 0x00010014,  0x00002261,  0x00000000 },
+	{ 0x07600031, 0x20004a04,  0x06000fe0,  0x82000010 },
+	{ 0x00000000, 0x00000000,  0x00000000,  0x00000000 },
+	{ 0x00000000, 0x00000000,  0x00000000,  0x00000000 },
+	{ 0x00000000, 0x00000000,  0x00000000,  0x00000000 },
+	{ 0x00000000, 0x00000000,  0x00000000,  0x00000000 },
+	{ 0x00000000, 0x00000000,  0x00000000,  0x00000000 },
+	{ 0x00000000, 0x00000000,  0x00000000,  0x00000000 },
+	{ 0x00000000, 0x00000000,  0x00000000,  0x00000000 },
+	{ 0x00000000, 0x00000000,  0x00000000,  0x00000000 },
+};
+
 /*
  * This sets up the media pipeline,
  *
@@ -245,3 +285,73 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
 			      gen8_media_kernel, sizeof(gen8_media_kernel));
 
 }
+
+static void
+__gen11_media_vme_func(struct intel_batchbuffer *batch,
+		       const struct igt_buf *src,
+		       unsigned int width, unsigned int height,
+		       const struct igt_buf *dst,
+		       const uint32_t kernel[][4],
+		       size_t kernel_size)
+{
+	uint32_t curbe_buffer, interface_descriptor;
+	uint32_t batch_end;
+
+	intel_batchbuffer_flush(batch);
+
+	/* setup states */
+	batch->ptr = &batch->buffer[BATCH_STATE_SPLIT];
+
+	curbe_buffer = gen11_fill_curbe_buffer_data(batch);
+	interface_descriptor = gen11_fill_interface_descriptor(batch, src, dst,
+					kernel, kernel_size);
+	assert(batch->ptr < &batch->buffer[4095]);
+
+	/* media pipeline */
+	batch->ptr = batch->buffer;
+	OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
+		  GEN9_FORCE_MEDIA_AWAKE_ENABLE |
+		  GEN9_SAMPLER_DOP_GATE_DISABLE |
+		  GEN9_PIPELINE_SELECTION_MASK |
+		  GEN9_SAMPLER_DOP_GATE_MASK |
+		  GEN9_FORCE_MEDIA_AWAKE_MASK);
+	gen9_emit_state_base_address(batch);
+
+	gen8_emit_vfe_state(batch, THREADS, MEDIA_URB_ENTRIES, MEDIA_URB_SIZE,
+			    MEDIA_CURBE_SIZE);
+
+	gen7_emit_curbe_load(batch, curbe_buffer);
+
+	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
+
+	gen7_emit_media_objects(batch, 0, 0, width, height);
+
+	OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
+		  GEN9_FORCE_MEDIA_AWAKE_DISABLE |
+		  GEN9_SAMPLER_DOP_GATE_ENABLE |
+		  GEN9_PIPELINE_SELECTION_MASK |
+		  GEN9_SAMPLER_DOP_GATE_MASK |
+		  GEN9_FORCE_MEDIA_AWAKE_MASK);
+
+	OUT_BATCH(MI_BATCH_BUFFER_END);
+
+	batch_end = intel_batchbuffer_align(batch, 8);
+	assert(batch_end < BATCH_STATE_SPLIT);
+
+	gen7_render_flush(batch, batch_end);
+	intel_batchbuffer_reset(batch);
+}
+
+void
+gen11_media_vme_func(struct intel_batchbuffer *batch,
+		     const struct igt_buf *src,
+		     unsigned int width, unsigned int height,
+		     const struct igt_buf *dst)
+{
+	__gen11_media_vme_func(batch,
+			       src,
+			       width, height,
+			       dst,
+			       gen11_media_vme_kernel,
+			       sizeof(gen11_media_vme_kernel));
+}
diff --git a/lib/media_fill.h b/lib/media_fill.h
index e365da9e47d2..1d5c5fa826a3 100644
--- a/lib/media_fill.h
+++ b/lib/media_fill.h
@@ -49,4 +49,10 @@ gen9_media_fillfunc(struct intel_batchbuffer *batch,
 		    unsigned int width, unsigned int height,
 		    uint8_t color);
 
+void
+gen11_media_vme_func(struct intel_batchbuffer *batch,
+		     const struct igt_buf *src,
+		     unsigned int width, unsigned int height,
+		     const struct igt_buf *dst);
+
 #endif /* RENDE_MEDIA_FILL_H */
diff --git a/lib/surfaceformat.h b/lib/surfaceformat.h
index 5d7ed2cadc2d..32ea373262ca 100644
--- a/lib/surfaceformat.h
+++ b/lib/surfaceformat.h
@@ -171,6 +171,8 @@
 #define SURFACEFORMAT_R16G16B16_SSCALED	0x19E
 #define SURFACEFORMAT_R16G16B16_USCALED	0x19F
 
+#define SURFACEFORMAT_RAW	0x1FF
+
 #define SURFACERETURNFORMAT_FLOAT32	0
 #define SURFACERETURNFORMAT_S1	1
 
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 3dfeb5b67274..6dabb9dbf247 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -302,6 +302,9 @@ gem_madvise_SOURCES = i915/gem_madvise.c
 TESTS_progs += gem_media_fill
 gem_media_fill_SOURCES = i915/gem_media_fill.c
 
+TESTS_progs += gem_media_vme
+gem_media_vme_SOURCES = i915/gem_media_vme.c
+
 TESTS_progs += gem_mmap
 gem_mmap_SOURCES = i915/gem_mmap.c
 
diff --git a/tests/i915/gem_media_vme.c b/tests/i915/gem_media_vme.c
new file mode 100644
index 000000000000..8d9fd822b2ee
--- /dev/null
+++ b/tests/i915/gem_media_vme.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "igt.h"
+#include <stdbool.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("A very simple workload for the VME media block.");
+
+#define WIDTH	64
+#define STRIDE	(WIDTH)
+#define HEIGHT	64
+
+#define INPUT_SIZE	(WIDTH * HEIGHT * sizeof(char) * 1.5)
+#define OUTPUT_SIZE	(56*sizeof(int))
+
+static void
+scratch_buf_init(drm_intel_bufmgr *bufmgr,
+		 struct igt_buf *buf,
+		 unsigned int size)
+{
+	drm_intel_bo *bo;
+
+	bo = drm_intel_bo_alloc(bufmgr, "", size, 4096);
+	igt_assert(bo);
+
+	memset(buf, 0, sizeof(*buf));
+
+	buf->bo = bo;
+	buf->tiling = I915_TILING_NONE;
+	buf->size = size;
+}
+
+static void scratch_buf_init_src(drm_intel_bufmgr *bufmgr, struct igt_buf *buf)
+{
+	scratch_buf_init(bufmgr, buf, INPUT_SIZE);
+
+	/*
+	 * Ideally we would read src surface from file "SourceFrameI.yu12".
+	 * But even without it, we can still triger the rcs0 resetting
+	 * with this vme kernel.
+	 */
+
+	buf->stride = STRIDE;
+}
+
+static void scratch_buf_init_dst(drm_intel_bufmgr *bufmgr, struct igt_buf *buf)
+{
+	scratch_buf_init(bufmgr, buf, OUTPUT_SIZE);
+
+	buf->stride = 1;
+}
+
+igt_simple_main
+{
+	int drm_fd;
+	uint32_t devid;
+	drm_intel_bufmgr *bufmgr;
+	igt_vme_func_t media_vme;
+	struct intel_batchbuffer *batch;
+	struct igt_buf src, dst;
+
+	drm_fd = drm_open_driver(DRIVER_INTEL);
+	igt_require_gem(drm_fd);
+
+	devid = intel_get_drm_devid(drm_fd);
+
+	media_vme = igt_get_media_vme_func(devid);
+	igt_require_f(media_vme, "no media-vme function\n");
+
+	bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
+	igt_assert(bufmgr);
+
+	batch = intel_batchbuffer_alloc(bufmgr, devid);
+	igt_assert(batch);
+
+	scratch_buf_init_src(bufmgr, &src);
+	scratch_buf_init_dst(bufmgr, &dst);
+
+	igt_fork_hang_detector(drm_fd);
+
+	media_vme(batch, &src, WIDTH, HEIGHT, &dst);
+
+	gem_sync(drm_fd, dst.bo->handle);
+
+	igt_stop_hang_detector();
+}
diff --git a/tests/meson.build b/tests/meson.build
index f41f724af8af..e3a4a84ea7cc 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -166,6 +166,7 @@ gem_progs = [
 	'gem_lut_handle',
 	'gem_madvise',
 	'gem_media_fill',
+	'gem_media_vme',
 	'gem_mmap',
 	'gem_mmap_gtt',
 	'gem_mmap_offset_exhaustion',
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH i-g-t 4/4] tests/gem_media_vme: Shut down half of subslices to avoid gpu hang on ICL
  2019-01-14 10:29 [PATCH i-g-t 0/4] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
                   ` (2 preceding siblings ...)
  2019-01-14 10:29 ` [PATCH i-g-t 3/4] tests/gem_media_vme: Simple test to exercise the VME block Tvrtko Ursulin
@ 2019-01-14 10:29 ` Tvrtko Ursulin
  3 siblings, 0 replies; 10+ messages in thread
From: Tvrtko Ursulin @ 2019-01-14 10:29 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tony Ye <tony.ye@intel.com>

On Icelake we need to turn off subslices not containing the VME block or
the VME kernel will hang.

v2: (Tvrtko Ursulin)
 * Remove libdrm usage for setting context param.
 * Cleanup bitmask operation.
 * Only apply the workaround for ICL.

v3: (Tvrtko Ursulin)
 * Added hang detector. (Chris Wilson)

v4: (Tvrtko Ursulin)
 * Rebase for hang detector moved to previous patch.
 * Tidy curly braces.

v5: (Tvrtko Ursulin)
 * Whitespace tidy. (Joonas)

Signed-off-by: Tony Ye <tony.ye@intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Tony Ye <tony.ye@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 lib/gpu_cmds.c             | 12 ++++++++
 lib/gpu_cmds.h             |  3 ++
 lib/media_fill.c           |  2 +-
 tests/i915/gem_media_vme.c | 60 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/lib/gpu_cmds.c b/lib/gpu_cmds.c
index b490a63bdfef..8d270ee86229 100644
--- a/lib/gpu_cmds.c
+++ b/lib/gpu_cmds.c
@@ -36,6 +36,18 @@ gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
 	igt_assert(ret == 0);
 }
 
+void
+gen7_render_context_flush(struct intel_batchbuffer *batch, uint32_t batch_end)
+{
+	int ret;
+
+	ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
+	if (ret == 0)
+		ret = drm_intel_gem_bo_context_exec(batch->bo, batch->ctx,
+				batch_end, 0);
+	igt_assert(ret == 0);
+}
+
 uint32_t
 gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
 			    uint8_t color)
diff --git a/lib/gpu_cmds.h b/lib/gpu_cmds.h
index ca671fb52daf..1321af446161 100644
--- a/lib/gpu_cmds.h
+++ b/lib/gpu_cmds.h
@@ -40,6 +40,9 @@
 void
 gen7_render_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
 
+void
+gen7_render_context_flush(struct intel_batchbuffer *batch, uint32_t batch_end);
+
 uint32_t
 gen7_fill_curbe_buffer_data(struct intel_batchbuffer *batch,
 			uint8_t color);
diff --git a/lib/media_fill.c b/lib/media_fill.c
index b1e84727394a..03b5e71e101b 100644
--- a/lib/media_fill.c
+++ b/lib/media_fill.c
@@ -338,7 +338,7 @@ __gen11_media_vme_func(struct intel_batchbuffer *batch,
 	batch_end = intel_batchbuffer_align(batch, 8);
 	assert(batch_end < BATCH_STATE_SPLIT);
 
-	gen7_render_flush(batch, batch_end);
+	gen7_render_context_flush(batch, batch_end);
 	intel_batchbuffer_reset(batch);
 }
 
diff --git a/tests/i915/gem_media_vme.c b/tests/i915/gem_media_vme.c
index 8d9fd822b2ee..d5045ad1d785 100644
--- a/tests/i915/gem_media_vme.c
+++ b/tests/i915/gem_media_vme.c
@@ -81,6 +81,52 @@ static void scratch_buf_init_dst(drm_intel_bufmgr *bufmgr, struct igt_buf *buf)
 	buf->stride = 1;
 }
 
+static uint64_t switch_off_n_bits(uint64_t mask, unsigned int n)
+{
+	unsigned int i;
+
+	igt_assert(n > 0 && n <= (sizeof(mask) * 8));
+	igt_assert(n <= __builtin_popcount(mask));
+
+	for (i = 0; n && i < (sizeof(mask) * 8); i++) {
+		uint64_t bit = 1ULL << i;
+
+		if (bit & mask) {
+			mask &= ~bit;
+			n--;
+		}
+	}
+
+	return mask;
+}
+
+static void shut_non_vme_subslices(int drm_fd, uint32_t ctx)
+{
+	struct drm_i915_gem_context_param_sseu sseu = { };
+	struct drm_i915_gem_context_param arg = {
+		.param = I915_CONTEXT_PARAM_SSEU,
+		.ctx_id = ctx,
+		.size = sizeof(sseu),
+		.value = to_user_pointer(&sseu),
+	};
+	int ret;
+
+	if (__gem_context_get_param(drm_fd, &arg))
+		return; /* no sseu support */
+
+	ret = __gem_context_set_param(drm_fd, &arg);
+	igt_assert(ret == 0 || ret == -ENODEV || ret == -EINVAL);
+	if (ret)
+		return; /* no sseu support */
+
+	/* shutdown half subslices */
+	sseu.subslice_mask =
+		switch_off_n_bits(sseu.subslice_mask,
+				  __builtin_popcount(sseu.subslice_mask) / 2);
+
+	gem_context_set_param(drm_fd, &arg);
+}
+
 igt_simple_main
 {
 	int drm_fd;
@@ -107,6 +153,20 @@ igt_simple_main
 	scratch_buf_init_src(bufmgr, &src);
 	scratch_buf_init_dst(bufmgr, &dst);
 
+	batch->ctx = drm_intel_gem_context_create(bufmgr);
+	igt_assert(batch->ctx);
+
+	/* ICL hangs if non-VME enabled slices are enabled with a VME kernel. */
+	if (intel_gen(devid) == 11) {
+		uint32_t ctx_id;
+		int ret;
+
+		ret = drm_intel_gem_context_get_id(batch->ctx, &ctx_id);
+		igt_assert_eq(ret, 0);
+
+		shut_non_vme_subslices(drm_fd, ctx_id);
+	}
+
 	igt_fork_hang_detector(drm_fd);
 
 	media_vme(batch, &src, WIDTH, HEIGHT, &dst);
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH i-g-t 1/4] headers: bump
  2019-01-17 11:01 [PATCH i-g-t 0/4] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
@ 2019-01-17 11:01 ` Tvrtko Ursulin
  0 siblings, 0 replies; 10+ messages in thread
From: Tvrtko Ursulin @ 2019-01-17 11:01 UTC (permalink / raw)
  To: igt-dev; +Cc: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Sync with latest drm headers from drm-tip.

Acked-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 include/drm-uapi/drm_fourcc.h | 25 +++++++++++++-
 include/drm-uapi/drm_mode.h   | 19 +++++++++++
 include/drm-uapi/i915_drm.h   | 64 +++++++++++++++++++++++++++++++++++
 include/drm-uapi/msm_drm.h    | 25 ++++++++++----
 include/drm-uapi/v3d_drm.h    | 33 ++++++++++++++++++
 5 files changed, 159 insertions(+), 7 deletions(-)

diff --git a/include/drm-uapi/drm_fourcc.h b/include/drm-uapi/drm_fourcc.h
index 4ddf754bab09..41106c835747 100644
--- a/include/drm-uapi/drm_fourcc.h
+++ b/include/drm-uapi/drm_fourcc.h
@@ -151,7 +151,7 @@ extern "C" {
 #define DRM_FORMAT_VYUY		fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */
 
 #define DRM_FORMAT_AYUV		fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */
-#define DRM_FORMAT_XYUV8888	fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */
+#define DRM_FORMAT_XYUV8888		fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */
 
 /*
  * packed YCbCr420 2x2 tiled formats
@@ -581,10 +581,18 @@ extern "C" {
  * Indicates the superblock size(s) used for the AFBC buffer. The buffer
  * size (in pixels) must be aligned to a multiple of the superblock size.
  * Four lowest significant bits(LSBs) are reserved for block size.
+ *
+ * Where one superblock size is specified, it applies to all planes of the
+ * buffer (e.g. 16x16, 32x8). When multiple superblock sizes are specified,
+ * the first applies to the Luma plane and the second applies to the Chroma
+ * plane(s). e.g. (32x8_64x4 means 32x8 Luma, with 64x4 Chroma).
+ * Multiple superblock sizes are only valid for multi-plane YCbCr formats.
  */
 #define AFBC_FORMAT_MOD_BLOCK_SIZE_MASK      0xf
 #define AFBC_FORMAT_MOD_BLOCK_SIZE_16x16     (1ULL)
 #define AFBC_FORMAT_MOD_BLOCK_SIZE_32x8      (2ULL)
+#define AFBC_FORMAT_MOD_BLOCK_SIZE_64x4      (3ULL)
+#define AFBC_FORMAT_MOD_BLOCK_SIZE_32x8_64x4 (4ULL)
 
 /*
  * AFBC lossless colorspace transform
@@ -644,6 +652,21 @@ extern "C" {
  */
 #define AFBC_FORMAT_MOD_SC      (1ULL <<  9)
 
+/*
+ * AFBC double-buffer
+ *
+ * Indicates that the buffer is allocated in a layout safe for front-buffer
+ * rendering.
+ */
+#define AFBC_FORMAT_MOD_DB      (1ULL << 10)
+
+/*
+ * AFBC buffer content hints
+ *
+ * Indicates that the buffer includes per-superblock content hints.
+ */
+#define AFBC_FORMAT_MOD_BCH     (1ULL << 11)
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/drm-uapi/drm_mode.h b/include/drm-uapi/drm_mode.h
index d3e0fe31efc5..a439c2e67896 100644
--- a/include/drm-uapi/drm_mode.h
+++ b/include/drm-uapi/drm_mode.h
@@ -888,6 +888,25 @@ struct drm_mode_revoke_lease {
 	__u32 lessee_id;
 };
 
+/**
+ * struct drm_mode_rect - Two dimensional rectangle.
+ * @x1: Horizontal starting coordinate (inclusive).
+ * @y1: Vertical starting coordinate (inclusive).
+ * @x2: Horizontal ending coordinate (exclusive).
+ * @y2: Vertical ending coordinate (exclusive).
+ *
+ * With drm subsystem using struct drm_rect to manage rectangular area this
+ * export it to user-space.
+ *
+ * Currently used by drm_mode_atomic blob property FB_DAMAGE_CLIPS.
+ */
+struct drm_mode_rect {
+	__s32 x1;
+	__s32 y1;
+	__s32 x2;
+	__s32 y2;
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
index e39b26d4bb3d..d2792ab3640b 100644
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -1486,9 +1486,73 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+	/*
+	 * When using the following param, value should be a pointer to
+	 * drm_i915_gem_context_param_sseu.
+	 */
+#define I915_CONTEXT_PARAM_SSEU		0x7
 	__u64 value;
 };
 
+/**
+ * Context SSEU programming
+ *
+ * It may be necessary for either functional or performance reason to configure
+ * a context to run with a reduced number of SSEU (where SSEU stands for Slice/
+ * Sub-slice/EU).
+ *
+ * This is done by configuring SSEU configuration using the below
+ * @struct drm_i915_gem_context_param_sseu for every supported engine which
+ * userspace intends to use.
+ *
+ * Not all GPUs or engines support this functionality in which case an error
+ * code -ENODEV will be returned.
+ *
+ * Also, flexibility of possible SSEU configuration permutations varies between
+ * GPU generations and software imposed limitations. Requesting such a
+ * combination will return an error code of -EINVAL.
+ *
+ * NOTE: When perf/OA is active the context's SSEU configuration is ignored in
+ * favour of a single global setting.
+ */
+struct drm_i915_gem_context_param_sseu {
+	/*
+	 * Engine class & instance to be configured or queried.
+	 */
+	__u16 engine_class;
+	__u16 engine_instance;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 flags;
+
+	/*
+	 * Mask of slices to enable for the context. Valid values are a subset
+	 * of the bitmask value returned for I915_PARAM_SLICE_MASK.
+	 */
+	__u64 slice_mask;
+
+	/*
+	 * Mask of subslices to enable for the context. Valid values are a
+	 * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK.
+	 */
+	__u64 subslice_mask;
+
+	/*
+	 * Minimum/Maximum number of EUs to enable per subslice for the
+	 * context. min_eus_per_subslice must be inferior or equal to
+	 * max_eus_per_subslice.
+	 */
+	__u16 min_eus_per_subslice;
+	__u16 max_eus_per_subslice;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 rsvd;
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */
diff --git a/include/drm-uapi/msm_drm.h b/include/drm-uapi/msm_drm.h
index c06d0a5bdd80..91a16b333c69 100644
--- a/include/drm-uapi/msm_drm.h
+++ b/include/drm-uapi/msm_drm.h
@@ -105,14 +105,24 @@ struct drm_msm_gem_new {
 	__u32 handle;         /* out */
 };
 
-#define MSM_INFO_IOVA	0x01
-
-#define MSM_INFO_FLAGS (MSM_INFO_IOVA)
+/* Get or set GEM buffer info.  The requested value can be passed
+ * directly in 'value', or for data larger than 64b 'value' is a
+ * pointer to userspace buffer, with 'len' specifying the number of
+ * bytes copied into that buffer.  For info returned by pointer,
+ * calling the GEM_INFO ioctl with null 'value' will return the
+ * required buffer size in 'len'
+ */
+#define MSM_INFO_GET_OFFSET	0x00   /* get mmap() offset, returned by value */
+#define MSM_INFO_GET_IOVA	0x01   /* get iova, returned by value */
+#define MSM_INFO_SET_NAME	0x02   /* set the debug name (by pointer) */
+#define MSM_INFO_GET_NAME	0x03   /* get debug name, returned by pointer */
 
 struct drm_msm_gem_info {
 	__u32 handle;         /* in */
-	__u32 flags;	      /* in - combination of MSM_INFO_* flags */
-	__u64 offset;         /* out, mmap() offset or iova */
+	__u32 info;           /* in - one of MSM_INFO_* */
+	__u64 value;          /* in or out */
+	__u32 len;            /* in or out */
+	__u32 pad;
 };
 
 #define MSM_PREP_READ        0x01
@@ -188,8 +198,11 @@ struct drm_msm_gem_submit_cmd {
  */
 #define MSM_SUBMIT_BO_READ             0x0001
 #define MSM_SUBMIT_BO_WRITE            0x0002
+#define MSM_SUBMIT_BO_DUMP             0x0004
 
-#define MSM_SUBMIT_BO_FLAGS            (MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE)
+#define MSM_SUBMIT_BO_FLAGS            (MSM_SUBMIT_BO_READ | \
+					MSM_SUBMIT_BO_WRITE | \
+					MSM_SUBMIT_BO_DUMP)
 
 struct drm_msm_gem_submit_bo {
 	__u32 flags;          /* in, mask of MSM_SUBMIT_BO_x */
diff --git a/include/drm-uapi/v3d_drm.h b/include/drm-uapi/v3d_drm.h
index b1e5de076b0f..ea70669d2138 100644
--- a/include/drm-uapi/v3d_drm.h
+++ b/include/drm-uapi/v3d_drm.h
@@ -36,6 +36,7 @@ extern "C" {
 #define DRM_V3D_MMAP_BO                           0x03
 #define DRM_V3D_GET_PARAM                         0x04
 #define DRM_V3D_GET_BO_OFFSET                     0x05
+#define DRM_V3D_SUBMIT_TFU                        0x06
 
 #define DRM_IOCTL_V3D_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
 #define DRM_IOCTL_V3D_WAIT_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
@@ -43,6 +44,7 @@ extern "C" {
 #define DRM_IOCTL_V3D_MMAP_BO             DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo)
 #define DRM_IOCTL_V3D_GET_PARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param)
 #define DRM_IOCTL_V3D_GET_BO_OFFSET       DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset)
+#define DRM_IOCTL_V3D_SUBMIT_TFU          DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_TFU, struct drm_v3d_submit_tfu)
 
 /**
  * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D
@@ -50,6 +52,14 @@ extern "C" {
  *
  * This asks the kernel to have the GPU execute an optional binner
  * command list, and a render command list.
+ *
+ * The L1T, slice, L2C, L2T, and GCA caches will be flushed before
+ * each CL executes.  The VCD cache should be flushed (if necessary)
+ * by the submitted CLs.  The TLB writes are guaranteed to have been
+ * flushed by the time the render done IRQ happens, which is the
+ * trigger for out_sync.  Any dirtying of cachelines by the job (only
+ * possible using TMU writes) must be flushed by the caller using the
+ * CL's cache flush commands.
  */
 struct drm_v3d_submit_cl {
 	/* Pointer to the binner command list.
@@ -179,6 +189,7 @@ enum drm_v3d_param {
 	DRM_V3D_PARAM_V3D_CORE0_IDENT0,
 	DRM_V3D_PARAM_V3D_CORE0_IDENT1,
 	DRM_V3D_PARAM_V3D_CORE0_IDENT2,
+	DRM_V3D_PARAM_SUPPORTS_TFU,
 };
 
 struct drm_v3d_get_param {
@@ -197,6 +208,28 @@ struct drm_v3d_get_bo_offset {
 	__u32 offset;
 };
 
+struct drm_v3d_submit_tfu {
+	__u32 icfg;
+	__u32 iia;
+	__u32 iis;
+	__u32 ica;
+	__u32 iua;
+	__u32 ioa;
+	__u32 ios;
+	__u32 coef[4];
+	/* First handle is the output BO, following are other inputs.
+	 * 0 for unused.
+	 */
+	__u32 bo_handles[4];
+	/* sync object to block on before running the TFU job.  Each TFU
+	 * job will execute in the order submitted to its FD.  Synchronization
+	 * against rendering jobs requires using sync objects.
+	 */
+	__u32 in_sync;
+	/* Sync object to signal when the TFU job is done. */
+	__u32 out_sync;
+};
+
 #if defined(__cplusplus)
 }
 #endif
-- 
2.19.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2019-01-17 11:01 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-01-14 10:29 [PATCH i-g-t 0/4] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
2019-01-14 10:29 ` [PATCH i-g-t 1/4] headers: bump Tvrtko Ursulin
2019-01-14 10:29 ` [PATCH i-g-t 2/4] tests/gem_ctx_sseu: Dynamic (sub)slice programming tests Tvrtko Ursulin
2019-01-14 10:29 ` [PATCH i-g-t 3/4] tests/gem_media_vme: Simple test to exercise the VME block Tvrtko Ursulin
2019-01-14 10:29 ` [PATCH i-g-t 4/4] tests/gem_media_vme: Shut down half of subslices to avoid gpu hang on ICL Tvrtko Ursulin
  -- strict thread matches above, loose matches on Subject: below --
2019-01-17 11:01 [PATCH i-g-t 0/4] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
2019-01-17 11:01 ` [PATCH i-g-t 1/4] headers: bump Tvrtko Ursulin
2019-01-08 15:12 [PATCH i-g-t 0/4] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
2019-01-08 15:13 ` [PATCH i-g-t 1/4] headers: bump Tvrtko Ursulin
2019-01-08 11:24 [PATCH i-g-t 0/4] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
2019-01-08 11:24 ` [PATCH i-g-t 1/4] headers: bump Tvrtko Ursulin
2018-12-13 12:06 [PATCH i-g-t 0/4] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
2018-12-13 12:06 ` [PATCH i-g-t 1/4] headers: bump Tvrtko Ursulin
2018-11-13 14:36 [PATCH i-g-t 0/4] Per context dynamic (sub)slice power-gating Tvrtko Ursulin
2018-11-13 14:36 ` [PATCH i-g-t 1/4] headers: bump Tvrtko Ursulin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox