From mboxrd@z Thu Jan 1 00:00:00 1970 From: Kenneth Graunke Subject: Re: [PATCH 2/2] intel: Add .aub file output support. Date: Thu, 08 Mar 2012 11:14:27 -0800 Message-ID: <4F590513.9000005@whitecape.org> References: <1331148067-8932-1-git-send-email-eric@anholt.net> <1331148067-8932-2-git-send-email-eric@anholt.net> <20120308025120.GT17001@yliu-dev.sh.intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1"; Format="flowed" Content-Transfer-Encoding: quoted-printable Return-path: Received: from homiemail-a4.g.dreamhost.com (caiajhbdcbef.dreamhost.com [208.97.132.145]) by gabe.freedesktop.org (Postfix) with ESMTP id 398FF9E773 for ; Thu, 8 Mar 2012 11:14:31 -0800 (PST) In-Reply-To: <20120308025120.GT17001@yliu-dev.sh.intel.com> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: intel-gfx-bounces+gcfxdi-intel-gfx=m.gmane.org@lists.freedesktop.org Errors-To: intel-gfx-bounces+gcfxdi-intel-gfx=m.gmane.org@lists.freedesktop.org To: Yuanhan Liu Cc: intel-gfx@lists.freedesktop.org List-Id: intel-gfx@lists.freedesktop.org On 03/07/2012 06:51 PM, Yuanhan Liu wrote: > On Wed, Mar 07, 2012 at 11:21:07AM -0800, Eric Anholt wrote: >> From: Kenneth Graunke >> >> This will allow the driver to capture all of its execution state to a >> file for later debugging. intel_gpu_dump is limited in that it only >> captures batchbuffers, and Mesa's captures, while more complete, still >> capture only a portion of the state involved in execution. >> >> It also enables us to load traces in our internal simulator. >> >> Signed-off-by: Eric Anholt >> Signed-off-by: Yuanhan Liu >> Signed-off-by: Kenneth Graunke >> --- >> intel/Makefile.am | 1 + >> intel/intel_aub.h | 123 ++++++++++++++++++ >> intel/intel_bufmgr.h | 14 ++ >> intel/intel_bufmgr_gem.c | 315 ++++++++++++++++++++++++++++++++++++++= ++++++++ >> 4 files changed, 453 insertions(+), 0 deletions(-) >> create mode 100644 intel/intel_aub.h >> >> diff --git a/intel/Makefile.am b/intel/Makefile.am >> index 06362b6..dc01a96 100644 >> --- a/intel/Makefile.am >> +++ b/intel/Makefile.am >> @@ -53,6 +53,7 @@ intel_bufmgr_gem_o_CFLAGS =3D $(AM_CFLAGS) -c99 >> >> libdrm_intelincludedir =3D ${includedir}/libdrm >> libdrm_intelinclude_HEADERS =3D intel_bufmgr.h \ >> + intel_aub.h \ >> intel_debug.h >> >> # This may be interesting even outside of "make check", due to the -du= mp option. >> diff --git a/intel/intel_aub.h b/intel/intel_aub.h >> new file mode 100644 >> index 0000000..a36fd53 >> --- /dev/null >> +++ b/intel/intel_aub.h >> @@ -0,0 +1,123 @@ >> +/* >> + * Copyright =A9 2010 Intel Corporation >> + * >> + * Permission is hereby granted, free of charge, to any person obtainin= g a >> + * copy of this software and associated documentation files (the "Softw= are"), >> + * to deal in the Software without restriction, including without limit= ation >> + * the rights to use, copy, modify, merge, publish, distribute, sublice= nse, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice (including the= next >> + * paragraph) shall be included in all copies or substantial portions o= f the >> + * Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPR= ESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL= ITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT S= HALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR= OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARIS= ING >> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER = DEALINGS >> + * IN THE SOFTWARE. >> + * >> + * Authors: >> + * Eric Anholt >> + * >> + */ >> + >> +/** @file intel_aub.h >> + * >> + * The AUB file is a file format used by Intel's internal simulation >> + * and other validation tools. It can be used at various levels by a >> + * driver to input state to the simulated hardware or a replaying >> + * debugger. >> + * >> + * We choose to dump AUB files using the trace block format for ease >> + * of implementation -- dump out the blocks of memory as plain blobs >> + * and insert ring commands to execute the batchbuffer blob. >> + */ >> + >> +#ifndef _INTEL_AUB_H >> +#define _INTEL_AUB_H >> + >> +#define AUB_MI_NOOP (0) >> +#define AUB_MI_BATCH_BUFFER_START (0x31<< 23) >> +#define AUB_PIPE_CONTROL (0x7a000002) >> + >> +/* DW0: instruction type. */ >> + >> +#define CMD_AUB (7<< 29) >> + >> +#define CMD_AUB_HEADER (CMD_AUB | (1<< 23) | (0x05<< 16)) >> +/* DW1 */ >> +# define AUB_HEADER_MAJOR_SHIFT 24 >> +# define AUB_HEADER_MINOR_SHIFT 16 >> + >> +#define CMD_AUB_TRACE_HEADER_BLOCK (CMD_AUB | (1<< 23) | (0x41<< 16)) >> +#define CMD_AUB_DUMP_BMP (CMD_AUB | (1<< 23) | (0x9e<< 16)) >> + >> +/* DW1 */ >> +#define AUB_TRACE_OPERATION_MASK 0x000000ff >> +#define AUB_TRACE_OP_COMMENT 0x00000000 >> +#define AUB_TRACE_OP_DATA_WRITE 0x00000001 >> +#define AUB_TRACE_OP_COMMAND_WRITE 0x00000002 >> +#define AUB_TRACE_OP_MMIO_WRITE 0x00000003 >> +// operation =3D TRACE_DATA_WRITE, Type >> +#define AUB_TRACE_TYPE_MASK 0x0000ff00 >> +#define AUB_TRACE_TYPE_NOTYPE (0<< 8) >> +#define AUB_TRACE_TYPE_BATCH (1<< 8) >> +#define AUB_TRACE_TYPE_VERTEX_BUFFER (5<< 8) >> +#define AUB_TRACE_TYPE_2D_MAP (6<< 8) >> +#define AUB_TRACE_TYPE_CUBE_MAP (7<< 8) >> +#define AUB_TRACE_TYPE_VOLUME_MAP (9<< 8) >> +#define AUB_TRACE_TYPE_1D_MAP (10<< 8) >> +#define AUB_TRACE_TYPE_CONSTANT_BUFFER (11<< 8) >> +#define AUB_TRACE_TYPE_CONSTANT_URB (12<< 8) >> +#define AUB_TRACE_TYPE_INDEX_BUFFER (13<< 8) >> +#define AUB_TRACE_TYPE_GENERAL (14<< 8) >> +#define AUB_TRACE_TYPE_SURFACE (15<< 8) >> + >> + >> +// operation =3D TRACE_COMMAND_WRITE, Type =3D >> +#define AUB_TRACE_TYPE_RING_HWB (1<< 8) >> +#define AUB_TRACE_TYPE_RING_PRB0 (2<< 8) >> +#define AUB_TRACE_TYPE_RING_PRB1 (3<< 8) >> +#define AUB_TRACE_TYPE_RING_PRB2 (4<< 8) >> + >> +// Address space >> +#define AUB_TRACE_ADDRESS_SPACE_MASK 0x00ff0000 >> +#define AUB_TRACE_MEMTYPE_GTT (0<< 16) >> +#define AUB_TRACE_MEMTYPE_LOCAL (1<< 16) >> +#define AUB_TRACE_MEMTYPE_NONLOCAL (2<< 16) >> +#define AUB_TRACE_MEMTYPE_PCI (3<< 16) >> +#define AUB_TRACE_MEMTYPE_GTT_ENTRY (4<< 16) >> + >> +/* DW2 */ >> +// operation =3D TRACE_DATA_WRITE, Type =3D TRACE_DATA_WRITE_GENERAL_ST= ATE >> +#define AUB_TRACE_GENERAL_STATE_MASK 0x000000ff >> + >> +#define AUB_TRACE_VS_STATE 0x00000001 >> +#define AUB_TRACE_GS_STATE 0x00000002 >> +#define AUB_TRACE_CL_STATE 0x00000003 >> +#define AUB_TRACE_SF_STATE 0x00000004 >> +#define AUB_TRACE_WM_STATE 0x00000005 >> +#define AUB_TRACE_CC_STATE 0x00000006 >> +#define AUB_TRACE_CL_VP 0x00000007 >> +#define AUB_TRACE_SF_VP 0x00000008 >> +#define AUB_TRACE_CC_VP 0x00000009 >> +#define AUB_TRACE_SAMPLER_STATE 0x0000000a >> +#define AUB_TRACE_KERNEL 0x0000000b >> +#define AUB_TRACE_SCRATCH 0x0000000c >> +#define AUB_TRACE_SDC 0x0000000d >> +#define AUB_TRACE_BLEND_STATE 0x00000016 >> +#define AUB_TRACE_DEPTH_STENCIL_STATE 0x00000017 >> + >> +// operation =3D TRACE_DATA_WRITE, Type =3D TRACE_DATA_WRITE_SURFACE_ST= ATE >> +#define AUB_TRACE_SURFACE_STATE_MASK 0x00000ff00 >> +#define AUB_TRACE_BINDING_TABLE 0x000000100 >> +#define AUB_TRACE_SURFACE_STATE 0x000000200 >> + >> +/* DW3: address */ >> +/* DW4: len */ >> + >> +#endif /* _INTEL_AUB_H */ >> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h >> index 8036031..fa6f2b8 100644 >> --- a/intel/intel_bufmgr.h >> +++ b/intel/intel_bufmgr.h >> @@ -36,6 +36,7 @@ >> >> #include >> #include >> +#include >> >> struct drm_clip_rect; >> >> @@ -84,6 +85,13 @@ struct _drm_intel_bo { >> int handle; >> }; >> >> +enum aub_dump_bmp_format { >> + AUB_DUMP_BMP_FORMAT_8BIT =3D 1, >> + AUB_DUMP_BMP_FORMAT_ARGB_4444 =3D 4, >> + AUB_DUMP_BMP_FORMAT_ARGB_0888 =3D 6, >> + AUB_DUMP_BMP_FORMAT_ARGB_8888 =3D 7, >> +}; >> + >> #define BO_ALLOC_FOR_RENDER (1<<0) >> >> drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char = *name, >> @@ -154,6 +162,12 @@ int drm_intel_gem_bo_get_reloc_count(drm_intel_bo *= bo); >> void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start); >> void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_ena= ble); >> >> +void drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int en= able); >> +void drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, >> + int x1, int y1, int width, int height, >> + enum aub_dump_bmp_format format, >> + int pitch, int offset); >> + >> int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc= _id); >> >> int drm_intel_get_aperture_sizes(int fd, size_t *mappable, size_t *tot= al); >> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c >> index ba38e50..45585f7 100644 >> --- a/intel/intel_bufmgr_gem.c >> +++ b/intel/intel_bufmgr_gem.c >> @@ -58,6 +58,7 @@ >> #include "intel_bufmgr.h" >> #include "intel_bufmgr_priv.h" >> #include "intel_chipset.h" >> +#include "intel_aub.h" >> #include "string.h" >> >> #include "i915_drm.h" >> @@ -121,6 +122,9 @@ typedef struct _drm_intel_bufmgr_gem { >> unsigned int bo_reuse : 1; >> unsigned int no_exec : 1; >> bool fenced_relocs; >> + >> + FILE *aub_file; >> + uint32_t aub_offset; >> } drm_intel_bufmgr_gem; >> >> #define DRM_INTEL_RELOC_FENCE (1<<0) >> @@ -215,6 +219,8 @@ struct _drm_intel_bo_gem { >> >> /** Flags that we may need to do the SW_FINSIH ioctl on unmap. */ >> bool mapped_cpu_write; >> + >> + uint32_t aub_offset; >> }; >> >> static unsigned int >> @@ -1715,6 +1721,247 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufm= gr_gem *bufmgr_gem) >> } >> } >> >> +static void >> +aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data) >> +{ >> + fwrite(&data, 1, 4, bufmgr_gem->aub_file); >> +} >> + >> +static void >> +aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size) >> +{ >> + fwrite(data, 1, size, bufmgr_gem->aub_file); >> +} >> + >> +static void >> +aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size) >> +{ >> + drm_intel_bufmgr_gem *bufmgr_gem =3D (drm_intel_bufmgr_gem *) bo->bufm= gr; >> + drm_intel_bo_gem *bo_gem =3D (drm_intel_bo_gem *) bo; >> + uint32_t *data; >> + unsigned int i; >> + >> + data =3D malloc(bo->size); >> + drm_intel_bo_get_subdata(bo, offset, size, data); >> + >> + /* Easy mode: write out bo with no relocations */ >> + if (!bo_gem->reloc_count) { >> + aub_out_data(bufmgr_gem, data, size); >> + free(data); >> + return; >> + } >> + >> + /* Otherwise, handle the relocations while writing. */ >> + for (i =3D 0; i< size / 4; i++) { >> + int r; >> + for (r =3D 0; r< bo_gem->reloc_count; r++) { >> + struct drm_i915_gem_relocation_entry *reloc; >> + drm_intel_reloc_target *info; >> + >> + reloc =3D&bo_gem->relocs[r]; >> + info =3D&bo_gem->reloc_target_info[r]; >> + >> + if (reloc->offset =3D=3D offset + i * 4) { >> + drm_intel_bo_gem *target_gem; >> + uint32_t val; >> + >> + target_gem =3D (drm_intel_bo_gem *)info->bo; >> + >> + val =3D reloc->delta; >> + val +=3D target_gem->aub_offset; >> + >> + aub_out(bufmgr_gem, val); >> + data[i] =3D val; >> + break; >> + } >> + } >> + if (r =3D=3D bo_gem->reloc_count) { >> + /* no relocation, just the data */ >> + aub_out(bufmgr_gem, data[i]); >> + } >> + } >> + >> + free(data); >> +} >> + >> +static void >> +aub_bo_get_address(drm_intel_bo *bo) >> +{ >> + drm_intel_bufmgr_gem *bufmgr_gem =3D (drm_intel_bufmgr_gem *) bo->bufm= gr; >> + drm_intel_bo_gem *bo_gem =3D (drm_intel_bo_gem *) bo; >> + >> + /* Give the object a graphics address in the AUB file. We >> + * don't just use the GEM object address because we do AUB >> + * dumping before execution -- we want to successfully log >> + * when the hardware might hang, and we might even want to aub >> + * capture for a driver trying to execute on a different >> + * generation of hardware by disabling the actual kernel exec >> + * call. >> + */ >> + bo_gem->aub_offset =3D bufmgr_gem->aub_offset; >> + bufmgr_gem->aub_offset +=3D bo->size; >> + /* XXX: Handle aperture overflow. */ >> + assert(bufmgr_gem->aub_offset< 256 * 1024 * 1024); >> +} >> + >> +static void >> +aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, >> + uint32_t offset, uint32_t size) >> +{ >> + drm_intel_bufmgr_gem *bufmgr_gem =3D (drm_intel_bufmgr_gem *) bo->bufm= gr; >> + drm_intel_bo_gem *bo_gem =3D (drm_intel_bo_gem *) bo; >> + >> + aub_out(bufmgr_gem, >> + CMD_AUB_TRACE_HEADER_BLOCK | >> + (5 - 2)); >> + aub_out(bufmgr_gem, >> + AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE); >> + aub_out(bufmgr_gem, subtype); >> + aub_out(bufmgr_gem, bo_gem->aub_offset + offset); >> + aub_out(bufmgr_gem, size); >> + aub_write_bo_data(bo, offset, size); >> +} >> + >> +static void >> +aub_write_bo(drm_intel_bo *bo) >> +{ >> + uint32_t block_size; >> + uint32_t offset; >> + >> + aub_bo_get_address(bo); >> + >> + /* Break up large objects into multiple writes. Otherwise a >> + * 128kb VBO would overflow the 16 bits of size field in the >> + * packet header and everything goes badly after that. >> + */ >> + for (offset =3D 0; offset< bo->size; offset +=3D block_size) { >> + block_size =3D bo->size - offset; >> + >> + if (block_size> 8 * 4096) >> + block_size =3D 8 * 4096; >> + >> + aub_write_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0, >> + offset, block_size); >> + } >> +} >> + >> +/* >> + * Make a ringbuffer on fly and dump it >> + */ >> +static void >> +aub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem, >> + uint32_t batch_buffer, int ring_flag) >> +{ >> + uint32_t ringbuffer[4096]; >> + int ring =3D AUB_TRACE_TYPE_RING_PRB0; /* The default ring */ >> + int ring_count =3D 0; >> + >> + if (ring_flag =3D=3D I915_EXEC_BSD) >> + ring =3D AUB_TRACE_TYPE_RING_PRB1; >> + >> + /* Make a ring buffer to execute our batchbuffer. */ >> + memset(ringbuffer, 0, sizeof(ringbuffer)); >> + ringbuffer[ring_count++] =3D AUB_MI_BATCH_BUFFER_START; >> + ringbuffer[ring_count++] =3D batch_buffer; >> + >> + /* Write out the ring. This appears to trigger execution of >> + * the ring in the simulator. >> + */ >> + aub_out(bufmgr_gem, >> + CMD_AUB_TRACE_HEADER_BLOCK | >> + (5 - 2)); >> + aub_out(bufmgr_gem, >> + AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); >> + aub_out(bufmgr_gem, 0); /* general/surface subtype */ >> + aub_out(bufmgr_gem, bufmgr_gem->aub_offset); >> + aub_out(bufmgr_gem, ring_count * 4); >> + >> + /* FIXME: Need some flush operations here? */ >> + aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4); >> + >> + /* Update offset pointer */ >> + bufmgr_gem->aub_offset +=3D 4096; >> +} >> + >> +void >> +drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, >> + int x1, int y1, int width, int height, >> + enum aub_dump_bmp_format format, >> + int pitch, int offset) >> +{ >> + drm_intel_bufmgr_gem *bufmgr_gem =3D (drm_intel_bufmgr_gem *) bo->bufm= gr; >> + drm_intel_bo_gem *bo_gem =3D (drm_intel_bo_gem *)bo; >> + uint32_t cpp; >> + >> + switch (format) { >> + case AUB_DUMP_BMP_FORMAT_8BIT: >> + cpp =3D 1; >> + break; >> + case AUB_DUMP_BMP_FORMAT_ARGB_4444: >> + cpp =3D 2; >> + break; >> + case AUB_DUMP_BMP_FORMAT_ARGB_0888: >> + case AUB_DUMP_BMP_FORMAT_ARGB_8888: >> + cpp =3D 4; >> + break; >> + default: >> + printf("Unknown AUB dump format %d\n", format); >> + return; >> + } >> + >> + if (!bufmgr_gem->aub_file) >> + return; >> + >> + aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4); >> + aub_out(bufmgr_gem, (y1<< 16) | x1); >> + aub_out(bufmgr_gem, >> + (format<< 24) | >> + (cpp<< 19) | >> + pitch / 4); >> + aub_out(bufmgr_gem, (height<< 16) | width); >> + aub_out(bufmgr_gem, bo_gem->aub_offset + offset); >> + aub_out(bufmgr_gem, >> + ((bo_gem->tiling_mode !=3D I915_TILING_NONE) ? (1<< 2) : 0) | >> + ((bo_gem->tiling_mode =3D=3D I915_TILING_Y) ? (1<< 3) : 0)); >> +} >> + >> +static void >> +aub_exec(drm_intel_bo *bo, int ring_flag, int used) >> +{ >> + drm_intel_bufmgr_gem *bufmgr_gem =3D (drm_intel_bufmgr_gem *) bo->bufm= gr; >> + drm_intel_bo_gem *bo_gem =3D (drm_intel_bo_gem *) bo; >> + int i; >> + >> + if (!bufmgr_gem->aub_file) >> + return; >> + >> + /* Write out all but the batchbuffer to AUB memory */ >> + for (i =3D 0; i< bufmgr_gem->exec_count - 1; i++) { >> + if (bufmgr_gem->exec_bos[i] !=3D bo) >> + aub_write_bo(bufmgr_gem->exec_bos[i]); >> + } >> + >> + aub_bo_get_address(bo); >> + >> + /* Dump the batchbuffer. */ >> + aub_write_trace_block(bo, AUB_TRACE_TYPE_BATCH, 0, >> + 0, used); >> + aub_write_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0, >> + used, bo->size - used); >> + >> + /* Dump ring buffer */ >> + aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag); >> + >> + fflush(bufmgr_gem->aub_file); >> + >> + /* >> + * One frame has been dumped. So reset the aub_offset for the next fra= me. >> + * >> + * FIXME: Can we do this? >> + */ >> + bufmgr_gem->aub_offset =3D 0x10000; >> +} >> + >> static int >> drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, >> drm_clip_rect_t * cliprects, int num_cliprects, int DR4) >> @@ -1830,6 +2077,8 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int u= sed, >> execbuf.rsvd1 =3D 0; >> execbuf.rsvd2 =3D 0; >> >> + aub_exec(bo, flags, used); >> + >> if (bufmgr_gem->no_exec) >> goto skip_execution; >> >> @@ -2360,6 +2609,72 @@ drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *= bufmgr) >> } >> >> /** >> + * Sets up AUB dumping. >> + * >> + * This is a trace file format that can be used with the simulator. >> + * Packets are emitted in a format somewhat like GPU command packets. >> + * You can set up a GTT and upload your objects into the referenced >> + * space, then send off batchbuffers and get BMPs out the other end. >> + */ >> +void >> +drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable) >> +{ >> + drm_intel_bufmgr_gem *bufmgr_gem =3D (drm_intel_bufmgr_gem *)bufmgr; >> + int entry =3D 0x200003; >> + int i; >> + int gtt_size =3D 0x10000; >> + >> + if (!enable) { >> + if (bufmgr_gem->aub_file) { >> + fclose(bufmgr_gem->aub_file); >> + bufmgr_gem->aub_file =3D NULL; >> + } >> + } >> + >> + if (geteuid() !=3D getuid()) >> + return; >> + >> + bufmgr_gem->aub_file =3D fopen("intel.aub", "w+"); > > I guess it's would be better that we can name the aub dump file > according to the program we are tracing, like if we run: > $ INTEL_DEBUG=3Daub glxgears > > I guess it would be good if we get a glxgears.aub but not intel.aub. > Otherwise, it would override the former one we dumped. It's somehow a > little un-convenient. That would be a nice feature, though I'm okay with intel.aub for now. >> + if (!bufmgr_gem->aub_file) >> + return; >> + >> + /* Start allocating objects from just after the GTT. */ >> + bufmgr_gem->aub_offset =3D gtt_size; >> + >> + /* Start with a (required) version packet. */ >> + aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2)); >> + aub_out(bufmgr_gem, >> + (4<< AUB_HEADER_MAJOR_SHIFT) | >> + (0<< AUB_HEADER_MINOR_SHIFT)); >> + for (i =3D 0; i< 8; i++) { >> + aub_out(bufmgr_gem, 0); /* app name */ >> + } >> + aub_out(bufmgr_gem, 0); /* timestamp */ >> + aub_out(bufmgr_gem, 0); /* timestamp */ >> + aub_out(bufmgr_gem, 0); /* comment len */ >> + >> + /* Set up the GTT. The max we can handle is 256M */ >> + aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | (5 - 2)); >> + aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_NONLOCAL | 0 | AUB_TRACE_OP_DATA= _WRITE); >> + aub_out(bufmgr_gem, 0); /* subtype */ >> + aub_out(bufmgr_gem, 0); /* offset */ >> + aub_out(bufmgr_gem, gtt_size); /* size */ >> + for (i =3D 0x000; i< gtt_size; i +=3D 4, entry +=3D 0x1000) { >> + aub_out(bufmgr_gem, entry); >> + } >> + >> + /* MI_FLUSH enable */ >> + if (bufmgr_gem->gen>=3D 6) { >> + aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | (5 - 2)); >> + aub_out(bufmgr_gem, AUB_TRACE_OP_MMIO_WRITE); >> + aub_out(bufmgr_gem, 0); >> + aub_out(bufmgr_gem, 0x209c); /* reg addr */ >> + aub_out(bufmgr_gem, 0x04); /* Length in byte */ >> + aub_out(bufmgr_gem, ((1<< 12)<< 16) | (1<< 12)); >> + } > > Zhenyu and me came to an agreement that it's the driver side to do the > MI_FLUSH enable stuff. Since using MI_FLUSH is deprecated, and if you sti= ll > use it, it is your job to set up the MI_FLUSH enable bit. Oh...yeah, we should definitely drop that. >> +} >> + >> +/** >> * Initializes the GEM buffer manager, which uses the kernel to alloca= te, map, >> * and manage map buffer objections. >> * >> -- >> 1.7.9.1 Otherwise, Reviewed-by: Kenneth Graunke Please set yourself as the author, though - I just squashed this and = fixed up a few minor things.