public inbox for igt-dev@lists.freedesktop.org
 help / color / mirror / Atom feed
* [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits
@ 2018-07-25 21:38 Chris Wilson
  2018-08-01 14:47 ` Katarzyna Dec
  0 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2018-07-25 21:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

The aim of this test is to combine gem_linear_blits, gem_tiled_blits etc
into one test runner that covers investigation into HW alignment issues
as well as driver boundaries (relocs, access, thrashing).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Katarzyna Dec <katarzyna.dec@intel.com>
---
 tests/Makefile.sources |   1 +
 tests/gem_blits.c      | 753 +++++++++++++++++++++++++++++++++++++++++
 tests/meson.build      |   1 +
 3 files changed, 755 insertions(+)
 create mode 100644 tests/gem_blits.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index c84933f1d..564545fb7 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -41,6 +41,7 @@ TESTS_progs = \
 	drv_suspend \
 	gem_bad_reloc \
 	gem_basic \
+	gem_blits \
 	gem_busy \
 	gem_caching \
 	gem_close \
diff --git a/tests/gem_blits.c b/tests/gem_blits.c
new file mode 100644
index 000000000..44da775a4
--- /dev/null
+++ b/tests/gem_blits.c
@@ -0,0 +1,753 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include "igt_x86.h"
+
+#define MI_FLUSH_DW (0x26 << 23)
+
+#define BCS_SWCTRL 0x22200
+#define BCS_SRC_Y (1 << 0)
+#define BCS_DST_Y (1 << 1)
+
+struct device {
+	int fd;
+	int gen;
+	int pciid;
+	int llc;
+};
+
+struct buffer {
+	uint32_t handle;
+	uint16_t width;
+	uint16_t height;
+	uint16_t stride;
+	uint32_t size;
+	unsigned int tiling;
+	unsigned int caching;
+	uint64_t gtt_offset;
+	uint32_t model[] __attribute__((aligned(16)));
+};
+
+static int fls(uint64_t x)
+{
+	int t;
+
+	for (t = 0; x >> t; t++)
+		;
+
+	return t;
+}
+
+static unsigned int
+get_tiling_stride(const struct device *device,
+		  unsigned int width, unsigned int tiling)
+{
+	unsigned int stride = 4u * width;
+
+	if (tiling) {
+		if (device->gen < 3)
+			stride = ALIGN(stride, 128);
+		else if (device->gen < 4 || tiling == I915_TILING_X)
+			stride = ALIGN(stride, 512);
+		else
+			stride = ALIGN(stride, 128);
+		if (device->gen < 4)
+			stride = 1 << fls(stride - 1);
+	} else {
+		if (device->gen >= 8)
+			stride = ALIGN(stride, 64);
+	}
+
+	igt_assert(stride < UINT16_MAX && stride >= 4*width);
+	return stride;
+}
+
+static unsigned int
+get_tiling_height(const struct device *device,
+		  unsigned int height, unsigned int tiling)
+{
+	if (!tiling)
+		return height;
+
+	if (device->gen < 3)
+		return ALIGN(height, 16);
+	else if (device->gen < 4 || tiling == I915_TILING_X)
+		return ALIGN(height, 8);
+	else
+		return ALIGN(height, 32);
+}
+
+static struct buffer *buffer_create(const struct device *device,
+				    unsigned int width,
+				    unsigned int height)
+{
+	struct buffer *buffer;
+
+	igt_assert(width && height);
+
+	buffer = calloc(1, sizeof(*buffer) + 4u * width * height);
+	if (!buffer)
+		return NULL;
+
+	buffer->width = width;
+	buffer->height = height;
+
+	buffer->stride = get_tiling_stride(device, width, I915_TILING_NONE);
+	buffer->size = ALIGN(buffer->stride * height, 4096);
+	buffer->handle = gem_create(device->fd, buffer->size);
+	buffer->caching = device->llc;
+
+	for (int y = 0; y < height; y++) {
+		uint32_t *row = buffer->model + y * width;
+
+		for (int x = 0; x < width; x++)
+			row[x] = (y << 16 | x) ^ buffer->handle;
+
+		gem_write(device->fd,
+			  buffer->handle, 4u * y * width,
+			  row, 4u * width);
+	}
+
+	return buffer;
+}
+
+static void buffer_set_tiling(const struct device *device,
+			      struct buffer *buffer,
+			      unsigned int tiling)
+{
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	const bool has_64b_reloc = device->gen >= 8;
+	uint32_t stride, size, pitch;
+	uint32_t *batch;
+	int i;
+
+	if (buffer->tiling == tiling)
+		return;
+
+	stride = get_tiling_stride(device, buffer->width, tiling);
+	size = stride * get_tiling_height(device, buffer->height, tiling);
+	size = ALIGN(size, 4096);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = ARRAY_SIZE(obj);
+	if (device->gen >= 6)
+		execbuf.flags = I915_EXEC_BLT;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = gem_create(device->fd, size);
+	if (tiling) {
+		obj[0].flags = EXEC_OBJECT_NEEDS_FENCE;
+		gem_set_tiling(device->fd, obj[0].handle, tiling, stride);
+	}
+
+	obj[1].handle = buffer->handle;
+	obj[1].offset = buffer->gtt_offset;
+	if (buffer->tiling)
+		obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
+
+	obj[2].handle = gem_create(device->fd, 4096);
+	obj[2].relocs_ptr = to_user_pointer(memset(reloc, 0, sizeof(reloc)));
+	obj[2].relocation_count = 2;
+	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
+
+	i = 0;
+
+	if ((tiling | buffer->tiling) >= I915_TILING_Y) {
+		unsigned int mask;
+
+		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = BCS_SWCTRL;
+
+		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
+		if (buffer->tiling == I915_TILING_Y)
+			mask |= BCS_SRC_Y;
+		if (tiling == I915_TILING_Y)
+			mask |= BCS_DST_Y;
+		batch[i++] = mask;
+	}
+
+	batch[i] = (XY_SRC_COPY_BLT_CMD |
+		    XY_SRC_COPY_BLT_WRITE_ALPHA |
+		    XY_SRC_COPY_BLT_WRITE_RGB);
+	batch[i] |= 6 + 2 * has_64b_reloc;
+	if (device->gen >= 4 && buffer->tiling)
+		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
+	if (device->gen >= 4 && tiling)
+		batch[i] |= XY_SRC_COPY_BLT_DST_TILED;
+	i++;
+
+	pitch = stride;
+	if (device->gen >= 4 && tiling)
+		pitch /= 4;
+	batch[i++] = 3 << 24 | 0xcc << 16 | pitch;
+	batch[i++] = 0;
+	batch[i++] = buffer->height << 16 | buffer->width;
+	reloc[0].target_handle = obj[0].handle;
+	reloc[0].presumed_offset = obj[0].offset;
+	reloc[0].offset = sizeof(*batch) * i;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = obj[0].offset;
+	if (has_64b_reloc)
+		batch[i++] = obj[0].offset >> 32;
+
+	batch[i++] = 0;
+	pitch = buffer->stride;
+	if (device->gen >= 4 && buffer->tiling)
+		pitch /= 4;
+	batch[i++] = pitch;
+	reloc[1].target_handle = obj[1].handle;
+	reloc[1].presumed_offset = obj[1].offset;
+	reloc[1].offset = sizeof(*batch) * i;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = obj[1].offset;
+	if (has_64b_reloc)
+		batch[i++] = obj[1].offset >> 32;
+
+	if ((tiling | buffer->tiling) >= I915_TILING_Y) {
+		igt_assert(device->gen >= 6);
+		batch[i++] = MI_FLUSH_DW | 2;
+		batch[i++] = 0;
+		batch[i++] = 0;
+		batch[i++] = 0;
+
+		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = BCS_SWCTRL;
+		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
+	}
+
+	batch[i++] = MI_BATCH_BUFFER_END;
+	munmap(batch, 4096);
+
+	gem_execbuf(device->fd, &execbuf);
+
+	gem_close(device->fd, obj[2].handle);
+	gem_close(device->fd, obj[1].handle);
+
+	buffer->gtt_offset = obj[0].offset;
+	buffer->handle = obj[0].handle;
+
+	buffer->tiling = tiling;
+	buffer->stride = stride;
+	buffer->size = size;
+}
+
+enum mode {
+	CPU,
+	PRW,
+	GTT,
+	WC,
+};
+
+static void blit_to_linear(const struct device *device,
+			   const struct buffer *buffer,
+			   void *linear)
+{
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	const bool has_64b_reloc = device->gen >= 8;
+	uint32_t *batch;
+	uint32_t pitch;
+	int i = 0;
+
+	igt_assert(buffer->tiling);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = ARRAY_SIZE(obj);
+	if (device->gen >= 6)
+		execbuf.flags = I915_EXEC_BLT;
+
+	memset(obj, 0, sizeof(obj));
+	gem_userptr(device->fd, linear, buffer->size, 0, 0, &obj[0].handle);
+	obj[1].handle = buffer->handle;
+	obj[1].offset = buffer->gtt_offset;
+	obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
+
+	memset(reloc, 0, sizeof(reloc));
+	obj[2].handle = gem_create(device->fd, 4096);
+	obj[2].relocs_ptr = to_user_pointer(reloc);
+	obj[2].relocation_count = ARRAY_SIZE(reloc);
+	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
+
+	if (buffer->tiling >= I915_TILING_Y) {
+		unsigned int mask;
+
+		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = BCS_SWCTRL;
+
+		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
+		if (buffer->tiling == I915_TILING_Y)
+			mask |= BCS_SRC_Y;
+		batch[i++] = mask;
+	}
+
+	batch[i] = (XY_SRC_COPY_BLT_CMD |
+		    XY_SRC_COPY_BLT_WRITE_ALPHA |
+		    XY_SRC_COPY_BLT_WRITE_RGB);
+	if (device->gen >= 4 && buffer->tiling)
+		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
+
+	batch[i++] |= 6 + 2 * has_64b_reloc;
+
+	batch[i++] = 3 << 24 | 0xcc << 16 | buffer->stride;
+	batch[i++] = 0;
+	batch[i++] = buffer->height << 16 | buffer->width;
+	reloc[0].target_handle = obj[0].handle;
+	reloc[0].presumed_offset = obj[0].offset;
+	reloc[0].offset = sizeof(*batch) * i;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = obj[0].offset;
+	if (has_64b_reloc)
+		batch[i++] = obj[0].offset >> 32;
+
+	batch[i++] = 0;
+	pitch = buffer->stride;
+	if (device->gen >= 4 && buffer->tiling)
+		pitch /= 4;
+	batch[i++] = pitch;
+	reloc[1].target_handle = obj[1].handle;
+	reloc[1].presumed_offset = obj[1].offset;
+	reloc[1].offset = sizeof(*batch) * i;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = obj[1].offset;
+	if (has_64b_reloc)
+		batch[i++] = obj[1].offset >> 32;
+
+	if (buffer->tiling >= I915_TILING_Y) {
+		igt_assert(device->gen >= 6);
+		batch[i++] = MI_FLUSH_DW | 2;
+		batch[i++] = 0;
+		batch[i++] = 0;
+		batch[i++] = 0;
+
+		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = BCS_SWCTRL;
+		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
+	}
+
+	batch[i++] = MI_BATCH_BUFFER_END;
+	munmap(batch, 4096);
+
+	gem_execbuf(device->fd, &execbuf);
+	gem_close(device->fd, obj[2].handle);
+
+	gem_sync(device->fd, obj[0].handle);
+	gem_close(device->fd, obj[0].handle);
+}
+
+static void *download(const struct device *device,
+		      const struct buffer *buffer,
+		      enum mode mode)
+{
+	void *linear, *src;
+
+	igt_assert(posix_memalign(&linear, 4096, buffer->size) == 0);
+
+	switch (mode) {
+	case CPU:
+		if (buffer->tiling) {
+			if (buffer->caching && !device->llc) {
+				mode = GTT;
+				break;
+			}
+			if (device->gen < 3) {
+				mode = GTT;
+				break;
+			}
+
+			blit_to_linear(device, buffer, linear);
+			return linear;
+		}
+		break;
+
+	case PRW:
+	case WC:
+		if (!buffer->tiling)
+			break;
+
+	default:
+		mode = GTT;
+		break;
+	}
+
+	switch (mode) {
+	case CPU:
+		src = gem_mmap__cpu(device->fd, buffer->handle,
+				    0, buffer->size,
+				    PROT_READ);
+
+		gem_set_domain(device->fd, buffer->handle,
+			       I915_GEM_DOMAIN_CPU, 0);
+		igt_memcpy_from_wc(linear, src, buffer->size);
+		munmap(src, buffer->size);
+		break;
+
+	case WC:
+		src = gem_mmap__wc(device->fd, buffer->handle,
+				   0, buffer->size,
+				   PROT_READ);
+
+		gem_set_domain(device->fd, buffer->handle,
+			       I915_GEM_DOMAIN_WC, 0);
+		igt_memcpy_from_wc(linear, src, buffer->size);
+		munmap(src, buffer->size);
+		break;
+
+	case GTT:
+		src = gem_mmap__gtt(device->fd, buffer->handle,
+				   buffer->size,
+				   PROT_READ);
+
+		gem_set_domain(device->fd, buffer->handle,
+			       I915_GEM_DOMAIN_GTT, 0);
+		igt_memcpy_from_wc(linear, src, buffer->size);
+		munmap(src, buffer->size);
+		break;
+
+	case PRW:
+		gem_read(device->fd, buffer->handle, 0, linear, buffer->size);
+		break;
+	}
+
+	return linear;
+}
+
+static bool buffer_check(const struct device *device,
+			 const struct buffer *buffer,
+			 enum mode mode)
+{
+	unsigned int num_errors = 0;
+	uint32_t *linear;
+
+	linear = download(device, buffer, mode);
+	igt_assert(linear);
+
+	for (int y = 0; y < buffer->height; y++) {
+		const uint32_t *model = buffer->model + y * buffer->width;
+		const uint32_t *row =
+			linear + y * buffer->stride / sizeof(uint32_t);
+
+		for (int x = 0; x < buffer->width; x++) {
+			if (row[x] != model[x] && num_errors++ < 5) {
+				igt_warn("buffer handle=%d mismatch at (%d, %d): expected %08x, found %08x\n",
+					 buffer->handle,
+					 x, y, model[x], row[x]);
+			}
+		}
+	}
+
+	free(linear);
+
+	return num_errors == 0;
+}
+
+static void buffer_free(const struct device *device, struct buffer *buffer)
+{
+	igt_assert(buffer_check(device, buffer, GTT));
+	gem_close(device->fd, buffer->handle);
+	free(buffer);
+}
+
+static void memcpy_blt(const void *src, void *dst,
+		       uint32_t src_stride, uint32_t dst_stride,
+		       uint16_t src_x, uint16_t src_y,
+		       uint16_t dst_x, uint16_t dst_y,
+		       uint16_t width, uint16_t height)
+{
+	const uint8_t *src_bytes;
+	uint8_t *dst_bytes;
+	int byte_width;
+
+	src_bytes = (const uint8_t *)src + src_stride * src_y + src_x * 4;
+	dst_bytes = (uint8_t *)dst + dst_stride * dst_y + dst_x * 4;
+
+	byte_width = width * 4;
+	if (byte_width == src_stride && byte_width == dst_stride) {
+		byte_width *= height;
+		height = 1;
+	}
+
+	switch (byte_width) {
+	case 4:
+		do {
+			*(uint32_t *)dst_bytes = *(const uint32_t *)src_bytes;
+			src_bytes += src_stride;
+			dst_bytes += dst_stride;
+		} while (--height);
+		break;
+
+	case 8:
+		do {
+			*(uint64_t *)dst_bytes = *(const uint64_t *)src_bytes;
+			src_bytes += src_stride;
+			dst_bytes += dst_stride;
+		} while (--height);
+		break;
+	case 16:
+		do {
+			((uint64_t *)dst_bytes)[0] = ((const uint64_t *)src_bytes)[0];
+			((uint64_t *)dst_bytes)[1] = ((const uint64_t *)src_bytes)[1];
+			src_bytes += src_stride;
+			dst_bytes += dst_stride;
+		} while (--height);
+		break;
+
+	default:
+		do {
+			memcpy(dst_bytes, src_bytes, byte_width);
+			src_bytes += src_stride;
+			dst_bytes += dst_stride;
+		} while (--height);
+		break;
+	}
+}
+
+static void
+blit(const struct device *device,
+     struct buffer *src, uint16_t src_x, uint16_t src_y,
+     struct buffer *dst, uint16_t dst_x, uint16_t dst_y,
+     uint16_t width, uint16_t height)
+
+{
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	const bool has_64b_reloc = device->gen >= 8;
+	uint32_t *batch;
+	uint32_t pitch;
+	int i = 0;
+
+	if (src_x < 0) {
+		width += src_x;
+		dst_x -= src_x;
+		src_x = 0;
+	}
+	if (src_y < 0) {
+		height += src_y;
+		dst_y  -= src_y;
+		src_y = 0;
+	}
+
+	if (dst_x < 0) {
+		width += dst_x;
+		src_x -= dst_x;
+		dst_x = 0;
+	}
+	if (dst_y < 0) {
+		height += dst_y;
+		src_y  -= dst_y;
+		dst_y = 0;
+	}
+
+	if (src_x + width > src->width)
+		width = src->width - src_x;
+	if (dst_x + width > dst->width)
+		width = dst->width - dst_x;
+
+	if (src_y + height > src->height)
+		height = src->height - src_y;
+	if (dst_y + height > dst->height)
+		height = dst->height - dst_y;
+
+	if (dst->caching) {
+		igt_assert(device->gen >= 3);
+		igt_assert(device->llc || !src->caching);
+	}
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = ARRAY_SIZE(obj);
+	if (device->gen >= 6)
+		execbuf.flags = I915_EXEC_BLT;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = dst->handle;
+	obj[0].offset = dst->gtt_offset;
+	if (dst->tiling)
+		obj[0].flags = EXEC_OBJECT_NEEDS_FENCE;
+
+	obj[1].handle = src->handle;
+	obj[1].offset = src->gtt_offset;
+	if (src->tiling)
+		obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
+
+	memset(reloc, 0, sizeof(reloc));
+	obj[2].handle = gem_create(device->fd, 4096);
+	obj[2].relocs_ptr = to_user_pointer(reloc);
+	obj[2].relocation_count = ARRAY_SIZE(reloc);
+	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
+
+	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
+		unsigned int mask;
+
+		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = BCS_SWCTRL;
+
+		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
+		if (src->tiling == I915_TILING_Y)
+			mask |= BCS_SRC_Y;
+		if (dst->tiling == I915_TILING_Y)
+			mask |= BCS_DST_Y;
+		batch[i++] = mask;
+	}
+
+	batch[i] = (XY_SRC_COPY_BLT_CMD |
+		    XY_SRC_COPY_BLT_WRITE_ALPHA |
+		    XY_SRC_COPY_BLT_WRITE_RGB);
+	batch[i] |= 6 + 2 * has_64b_reloc;
+	if (device->gen >= 4 && src->tiling)
+		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
+	if (device->gen >= 4 && dst->tiling)
+		batch[i] |= XY_SRC_COPY_BLT_DST_TILED;
+	i++;
+
+	pitch = dst->stride;
+	if (device->gen >= 4 && dst->tiling)
+		pitch /= 4;
+	batch[i++] = 3 << 24 | 0xcc << 16 | pitch;
+
+	batch[i++] = dst_y << 16 | dst_x;
+	batch[i++] = (height + dst_y) << 16 | (width + dst_x);
+	reloc[0].target_handle = obj[0].handle;
+	reloc[0].presumed_offset = obj[0].offset;
+	reloc[0].offset = sizeof(*batch) * i;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = obj[0].offset;
+	if (has_64b_reloc)
+		batch[i++] = obj[0].offset >> 32;
+
+	batch[i++] = src_y << 16 | src_x;
+	pitch = src->stride;
+	if (device->gen >= 4 && src->tiling)
+		pitch /= 4;
+	batch[i++] = pitch;
+	reloc[1].target_handle = obj[1].handle;
+	reloc[1].presumed_offset = obj[1].offset;
+	reloc[1].offset = sizeof(*batch) * i;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = obj[1].offset;
+	if (has_64b_reloc)
+		batch[i++] = obj[1].offset >> 32;
+
+	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
+		igt_assert(device->gen >= 6);
+		batch[i++] = MI_FLUSH_DW | 2;
+		batch[i++] = 0;
+		batch[i++] = 0;
+		batch[i++] = 0;
+
+		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = BCS_SWCTRL;
+		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
+	}
+
+	batch[i++] = MI_BATCH_BUFFER_END;
+	munmap(batch, 4096);
+
+	gem_execbuf(device->fd, &execbuf);
+	gem_close(device->fd, obj[2].handle);
+
+	dst->gtt_offset = obj[0].offset;
+	src->gtt_offset = obj[1].offset;
+
+	memcpy_blt(src->model, dst->model,
+		   4u * src->width, 4u * dst->width,
+		   src_x, src_y,
+		   dst_x, dst_y,
+		   width, height);
+}
+
+igt_main
+{
+	struct device device;
+
+	igt_fixture {
+		device.fd = drm_open_driver_render(DRIVER_INTEL);
+		igt_require_gem(device.fd);
+
+		device.pciid = intel_get_drm_devid(device.fd);
+		device.gen = intel_gen(device.pciid);
+		device.llc = gem_has_llc(device.fd);
+	}
+
+	igt_subtest("basic") {
+		struct buffer *src, *dst;
+		unsigned int x, y;
+
+		for (unsigned int height = 1; height <= 16; height <<= 1) {
+			for (unsigned int width = 1; width <= 64; width <<= 1) {
+				src = buffer_create(&device,
+						    width * 16, height * 4);
+				dst = buffer_create(&device,
+						    width * 16, height * 4);
+
+				y = 0;
+				for (unsigned int src_tiling = I915_TILING_NONE;
+				     src_tiling <= (device.gen >= 6 ? I915_TILING_Y : I915_TILING_X);
+				     src_tiling++) {
+					buffer_set_tiling(&device, src, src_tiling);
+
+					x = 0;
+					for (unsigned int dst_tiling = I915_TILING_NONE;
+					     dst_tiling <= (device.gen >= 6 ? I915_TILING_Y : I915_TILING_X);
+					     dst_tiling++) {
+						buffer_set_tiling(&device, dst, dst_tiling);
+
+						for (enum mode down = CPU; down <= WC; down++) {
+							igt_debug("Testing src_tiling=%d, dst_tiling=%d, down=%d at (%d, %d) x (%d, %d)\n",
+								  src_tiling,
+								  dst_tiling,
+								  down, x, y,
+								  width, height);
+
+							igt_assert(x + width <= dst->width);
+							igt_assert(y + height <= dst->height);
+
+							blit(&device,
+							     src, x, y,
+							     dst, x, y,
+							     width, height);
+							igt_assert(buffer_check(&device, dst, down));
+
+							x += width;
+						}
+					}
+
+					y += height;
+				}
+
+				buffer_free(&device, dst);
+				buffer_free(&device, src);
+			}
+		}
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index 32c2156c6..becadfa8d 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -18,6 +18,7 @@ test_progs = [
 	'drv_suspend',
 	'gem_bad_reloc',
 	'gem_basic',
+	'gem_blits',
 	'gem_busy',
 	'gem_caching',
 	'gem_close',
-- 
2.18.0

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits
  2018-07-25 21:38 [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits Chris Wilson
@ 2018-08-01 14:47 ` Katarzyna Dec
  2018-08-01 15:10   ` Chris Wilson
  0 siblings, 1 reply; 11+ messages in thread
From: Katarzyna Dec @ 2018-08-01 14:47 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

On Wed, Jul 25, 2018 at 10:38:23PM +0100, Chris Wilson wrote:
> The aim of this test is to combine gem_linear_blits, gem_tiled_blits etc
> into one test runner that covers investigation into HW alignment issues
> as well as driver boundaries (relocs, access, thrashing).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Katarzyna Dec <katarzyna.dec@intel.com>
> ---
>  tests/Makefile.sources |   1 +
>  tests/gem_blits.c      | 753 +++++++++++++++++++++++++++++++++++++++++
>  tests/meson.build      |   1 +
>  3 files changed, 755 insertions(+)
>  create mode 100644 tests/gem_blits.c
> 
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index c84933f1d..564545fb7 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -41,6 +41,7 @@ TESTS_progs = \
>  	drv_suspend \
>  	gem_bad_reloc \
>  	gem_basic \
> +	gem_blits \
>  	gem_busy \
>  	gem_caching \
>  	gem_close \
> diff --git a/tests/gem_blits.c b/tests/gem_blits.c
> new file mode 100644
> index 000000000..44da775a4
> --- /dev/null
> +++ b/tests/gem_blits.c
> @@ -0,0 +1,753 @@
> +/*
> + * Copyright © 2018 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "igt.h"
> +#include "igt_x86.h"
> +
> +#define MI_FLUSH_DW (0x26 << 23)
> +
> +#define BCS_SWCTRL 0x22200
> +#define BCS_SRC_Y (1 << 0)
> +#define BCS_DST_Y (1 << 1)
> +
> +struct device {
> +	int fd;
> +	int gen;
> +	int pciid;
> +	int llc;
> +};
> +
> +struct buffer {
> +	uint32_t handle;
> +	uint16_t width;
> +	uint16_t height;
> +	uint16_t stride;
> +	uint32_t size;
> +	unsigned int tiling;
> +	unsigned int caching;
> +	uint64_t gtt_offset;
> +	uint32_t model[] __attribute__((aligned(16)));
> +};
> +
> +static int fls(uint64_t x)
> +{
> +	int t;
> +
> +	for (t = 0; x >> t; t++)
> +		;
> +
> +	return t;
> +}
> +
> +static unsigned int
> +get_tiling_stride(const struct device *device,
> +		  unsigned int width, unsigned int tiling)
> +{
> +	unsigned int stride = 4u * width;
> +
> +	if (tiling) {
> +		if (device->gen < 3)
> +			stride = ALIGN(stride, 128);
> +		else if (device->gen < 4 || tiling == I915_TILING_X)
> +			stride = ALIGN(stride, 512);
> +		else
> +			stride = ALIGN(stride, 128);
> +		if (device->gen < 4)
> +			stride = 1 << fls(stride - 1);
Shouldn't 'else' be last here ^? What about order of 'else if' and 'if'
at the end?
> +	} else {
> +		if (device->gen >= 8)
> +			stride = ALIGN(stride, 64);
> +	}
> +
> +	igt_assert(stride < UINT16_MAX && stride >= 4*width);
> +	return stride;
> +}
> +
> +static unsigned int
> +get_tiling_height(const struct device *device,
> +		  unsigned int height, unsigned int tiling)
> +{
> +	if (!tiling)
> +		return height;
> +
> +	if (device->gen < 3)
> +		return ALIGN(height, 16);
> +	else if (device->gen < 4 || tiling == I915_TILING_X)
> +		return ALIGN(height, 8);
> +	else
> +		return ALIGN(height, 32);
> +}
> +
> +static struct buffer *buffer_create(const struct device *device,
> +				    unsigned int width,
> +				    unsigned int height)
> +{
> +	struct buffer *buffer;
> +
> +	igt_assert(width && height);
> +
> +	buffer = calloc(1, sizeof(*buffer) + 4u * width * height);
> +	if (!buffer)
> +		return NULL;
> +
> +	buffer->width = width;
> +	buffer->height = height;
> +
> +	buffer->stride = get_tiling_stride(device, width, I915_TILING_NONE);
> +	buffer->size = ALIGN(buffer->stride * height, 4096);
> +	buffer->handle = gem_create(device->fd, buffer->size);
> +	buffer->caching = device->llc;
> +
> +	for (int y = 0; y < height; y++) {
> +		uint32_t *row = buffer->model + y * width;
> +
> +		for (int x = 0; x < width; x++)
> +			row[x] = (y << 16 | x) ^ buffer->handle;
> +
> +		gem_write(device->fd,
> +			  buffer->handle, 4u * y * width,
> +			  row, 4u * width);
> +	}
> +
> +	return buffer;
> +}
> +
> +static void buffer_set_tiling(const struct device *device,
> +			      struct buffer *buffer,
> +			      unsigned int tiling)
> +{
> +	struct drm_i915_gem_exec_object2 obj[3];
> +	struct drm_i915_gem_relocation_entry reloc[2];
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	const bool has_64b_reloc = device->gen >= 8;
> +	uint32_t stride, size, pitch;
> +	uint32_t *batch;
> +	int i;
> +
> +	if (buffer->tiling == tiling)
> +		return;
> +
> +	stride = get_tiling_stride(device, buffer->width, tiling);
> +	size = stride * get_tiling_height(device, buffer->height, tiling);
> +	size = ALIGN(size, 4096);
> +
> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = to_user_pointer(obj);
> +	execbuf.buffer_count = ARRAY_SIZE(obj);
> +	if (device->gen >= 6)
> +		execbuf.flags = I915_EXEC_BLT;
> +
> +	memset(obj, 0, sizeof(obj));
> +	obj[0].handle = gem_create(device->fd, size);
> +	if (tiling) {
> +		obj[0].flags = EXEC_OBJECT_NEEDS_FENCE;
> +		gem_set_tiling(device->fd, obj[0].handle, tiling, stride);
> +	}
> +
> +	obj[1].handle = buffer->handle;
> +	obj[1].offset = buffer->gtt_offset;
> +	if (buffer->tiling)
> +		obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
> +
> +	obj[2].handle = gem_create(device->fd, 4096);
> +	obj[2].relocs_ptr = to_user_pointer(memset(reloc, 0, sizeof(reloc)));
> +	obj[2].relocation_count = 2;
> +	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
> +
> +	i = 0;
> +
> +	if ((tiling | buffer->tiling) >= I915_TILING_Y) {
> +		unsigned int mask;
> +
> +		batch[i++] = MI_LOAD_REGISTER_IMM;
> +		batch[i++] = BCS_SWCTRL;
> +

All batch creation staring from here ^ looks misterious. Maybe we can make
buffer_set_tiling and buffer_linear functions more clearer?
It is hard do read and understand.

> +		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
> +		if (buffer->tiling == I915_TILING_Y)
> +			mask |= BCS_SRC_Y;
> +		if (tiling == I915_TILING_Y)
> +			mask |= BCS_DST_Y;
> +		batch[i++] = mask;
> +	}
> +
> +	batch[i] = (XY_SRC_COPY_BLT_CMD |
> +		    XY_SRC_COPY_BLT_WRITE_ALPHA |
> +		    XY_SRC_COPY_BLT_WRITE_RGB);
> +	batch[i] |= 6 + 2 * has_64b_reloc;
> +	if (device->gen >= 4 && buffer->tiling)
> +		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
> +	if (device->gen >= 4 && tiling)
> +		batch[i] |= XY_SRC_COPY_BLT_DST_TILED;
> +	i++;
> +
> +	pitch = stride;
> +	if (device->gen >= 4 && tiling)
> +		pitch /= 4;
> +	batch[i++] = 3 << 24 | 0xcc << 16 | pitch;
> +	batch[i++] = 0;
> +	batch[i++] = buffer->height << 16 | buffer->width;
> +	reloc[0].target_handle = obj[0].handle;
> +	reloc[0].presumed_offset = obj[0].offset;
> +	reloc[0].offset = sizeof(*batch) * i;
> +	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> +	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = obj[0].offset;
> +	if (has_64b_reloc)
> +		batch[i++] = obj[0].offset >> 32;
> +
> +	batch[i++] = 0;
> +	pitch = buffer->stride;
> +	if (device->gen >= 4 && buffer->tiling)
> +		pitch /= 4;
> +	batch[i++] = pitch;
> +	reloc[1].target_handle = obj[1].handle;
> +	reloc[1].presumed_offset = obj[1].offset;
> +	reloc[1].offset = sizeof(*batch) * i;
> +	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = obj[1].offset;
> +	if (has_64b_reloc)
> +		batch[i++] = obj[1].offset >> 32;
> +
> +	if ((tiling | buffer->tiling) >= I915_TILING_Y) {
> +		igt_assert(device->gen >= 6);
> +		batch[i++] = MI_FLUSH_DW | 2;
> +		batch[i++] = 0;
> +		batch[i++] = 0;
> +		batch[i++] = 0;
> +
> +		batch[i++] = MI_LOAD_REGISTER_IMM;
> +		batch[i++] = BCS_SWCTRL;
> +		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
> +	}
> +
> +	batch[i++] = MI_BATCH_BUFFER_END;
> +	munmap(batch, 4096);
> +
> +	gem_execbuf(device->fd, &execbuf);
> +
> +	gem_close(device->fd, obj[2].handle);
> +	gem_close(device->fd, obj[1].handle);
> +
> +	buffer->gtt_offset = obj[0].offset;
> +	buffer->handle = obj[0].handle;
> +
> +	buffer->tiling = tiling;
> +	buffer->stride = stride;
> +	buffer->size = size;
> +}
> +
> +enum mode {
> +	CPU,
> +	PRW,
> +	GTT,
> +	WC,
> +};
> +
> +static void blit_to_linear(const struct device *device,
> +			   const struct buffer *buffer,
> +			   void *linear)
> +{
> +	struct drm_i915_gem_exec_object2 obj[3];
> +	struct drm_i915_gem_relocation_entry reloc[2];
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	const bool has_64b_reloc = device->gen >= 8;
> +	uint32_t *batch;
> +	uint32_t pitch;
> +	int i = 0;
> +
> +	igt_assert(buffer->tiling);
> +
> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = to_user_pointer(obj);
> +	execbuf.buffer_count = ARRAY_SIZE(obj);
> +	if (device->gen >= 6)
> +		execbuf.flags = I915_EXEC_BLT;
> +
> +	memset(obj, 0, sizeof(obj));
> +	gem_userptr(device->fd, linear, buffer->size, 0, 0, &obj[0].handle);
> +	obj[1].handle = buffer->handle;
> +	obj[1].offset = buffer->gtt_offset;
> +	obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
> +
> +	memset(reloc, 0, sizeof(reloc));
> +	obj[2].handle = gem_create(device->fd, 4096);
> +	obj[2].relocs_ptr = to_user_pointer(reloc);
> +	obj[2].relocation_count = ARRAY_SIZE(reloc);
> +	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
> +
> +	if (buffer->tiling >= I915_TILING_Y) {
> +		unsigned int mask;
> +
> +		batch[i++] = MI_LOAD_REGISTER_IMM;
> +		batch[i++] = BCS_SWCTRL;
> +
> +		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
> +		if (buffer->tiling == I915_TILING_Y)
> +			mask |= BCS_SRC_Y;
> +		batch[i++] = mask;
> +	}
> +
> +	batch[i] = (XY_SRC_COPY_BLT_CMD |
> +		    XY_SRC_COPY_BLT_WRITE_ALPHA |
> +		    XY_SRC_COPY_BLT_WRITE_RGB);
> +	if (device->gen >= 4 && buffer->tiling)
> +		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
> +
> +	batch[i++] |= 6 + 2 * has_64b_reloc;
> +
> +	batch[i++] = 3 << 24 | 0xcc << 16 | buffer->stride;
> +	batch[i++] = 0;
> +	batch[i++] = buffer->height << 16 | buffer->width;
> +	reloc[0].target_handle = obj[0].handle;
> +	reloc[0].presumed_offset = obj[0].offset;
> +	reloc[0].offset = sizeof(*batch) * i;
> +	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> +	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = obj[0].offset;
> +	if (has_64b_reloc)
> +		batch[i++] = obj[0].offset >> 32;
> +
> +	batch[i++] = 0;
> +	pitch = buffer->stride;
> +	if (device->gen >= 4 && buffer->tiling)
> +		pitch /= 4;
> +	batch[i++] = pitch;
> +	reloc[1].target_handle = obj[1].handle;
> +	reloc[1].presumed_offset = obj[1].offset;
> +	reloc[1].offset = sizeof(*batch) * i;
> +	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = obj[1].offset;
> +	if (has_64b_reloc)
> +		batch[i++] = obj[1].offset >> 32;
> +
> +	if (buffer->tiling >= I915_TILING_Y) {
> +		igt_assert(device->gen >= 6);
> +		batch[i++] = MI_FLUSH_DW | 2;
> +		batch[i++] = 0;
> +		batch[i++] = 0;
> +		batch[i++] = 0;
> +
> +		batch[i++] = MI_LOAD_REGISTER_IMM;
> +		batch[i++] = BCS_SWCTRL;
> +		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
> +	}
> +
> +	batch[i++] = MI_BATCH_BUFFER_END;
> +	munmap(batch, 4096);
> +
> +	gem_execbuf(device->fd, &execbuf);
> +	gem_close(device->fd, obj[2].handle);
> +
> +	gem_sync(device->fd, obj[0].handle);
> +	gem_close(device->fd, obj[0].handle);
> +}
> +
> +static void *download(const struct device *device,
> +		      const struct buffer *buffer,
> +		      enum mode mode)
> +{
> +	void *linear, *src;
> +
> +	igt_assert(posix_memalign(&linear, 4096, buffer->size) == 0);
> +
> +	switch (mode) {
> +	case CPU:
> +		if (buffer->tiling) {
> +			if (buffer->caching && !device->llc) {
> +				mode = GTT;
> +				break;
> +			}
> +			if (device->gen < 3) {
> +				mode = GTT;
> +				break;
> +			}
Why do we need 2 if-s here ^? Both are setting the same mode.
> +
> +			blit_to_linear(device, buffer, linear);
> +			return linear;
> +		}
> +		break;
> +
> +	case PRW:
> +	case WC:
> +		if (!buffer->tiling)
> +			break;
> +
> +	default:
> +		mode = GTT;
> +		break;
> +	}
> +
> +	switch (mode) {
> +	case CPU:
> +		src = gem_mmap__cpu(device->fd, buffer->handle,
> +				    0, buffer->size,
> +				    PROT_READ);
> +
> +		gem_set_domain(device->fd, buffer->handle,
> +			       I915_GEM_DOMAIN_CPU, 0);
> +		igt_memcpy_from_wc(linear, src, buffer->size);
> +		munmap(src, buffer->size);
> +		break;
> +
> +	case WC:
> +		src = gem_mmap__wc(device->fd, buffer->handle,
> +				   0, buffer->size,
> +				   PROT_READ);
> +
> +		gem_set_domain(device->fd, buffer->handle,
> +			       I915_GEM_DOMAIN_WC, 0);
> +		igt_memcpy_from_wc(linear, src, buffer->size);
> +		munmap(src, buffer->size);
> +		break;
> +
> +	case GTT:
> +		src = gem_mmap__gtt(device->fd, buffer->handle,
> +				   buffer->size,
> +				   PROT_READ);
> +
> +		gem_set_domain(device->fd, buffer->handle,
> +			       I915_GEM_DOMAIN_GTT, 0);
> +		igt_memcpy_from_wc(linear, src, buffer->size);
> +		munmap(src, buffer->size);
> +		break;
> +
> +	case PRW:
> +		gem_read(device->fd, buffer->handle, 0, linear, buffer->size);
> +		break;
> +	}
> +
> +	return linear;
> +}
> +
> +static bool buffer_check(const struct device *device,
> +			 const struct buffer *buffer,
> +			 enum mode mode)
> +{
> +	unsigned int num_errors = 0;
> +	uint32_t *linear;
> +
> +	linear = download(device, buffer, mode);
> +	igt_assert(linear);
> +
> +	for (int y = 0; y < buffer->height; y++) {
> +		const uint32_t *model = buffer->model + y * buffer->width;
> +		const uint32_t *row =
> +			linear + y * buffer->stride / sizeof(uint32_t);
> +
> +		for (int x = 0; x < buffer->width; x++) {
> +			if (row[x] != model[x] && num_errors++ < 5) {
> +				igt_warn("buffer handle=%d mismatch at (%d, %d): expected %08x, found %08x\n",
> +					 buffer->handle,
> +					 x, y, model[x], row[x]);
> +			}
> +		}
> +	}
> +
> +	free(linear);
> +
> +	return num_errors == 0;
> +}
> +
> +static void buffer_free(const struct device *device, struct buffer *buffer)
> +{
> +	igt_assert(buffer_check(device, buffer, GTT));
> +	gem_close(device->fd, buffer->handle);
> +	free(buffer);
> +}
> +
> +static void memcpy_blt(const void *src, void *dst,
> +		       uint32_t src_stride, uint32_t dst_stride,
> +		       uint16_t src_x, uint16_t src_y,
> +		       uint16_t dst_x, uint16_t dst_y,
> +		       uint16_t width, uint16_t height)
> +{
> +	const uint8_t *src_bytes;
> +	uint8_t *dst_bytes;
> +	int byte_width;
> +
> +	src_bytes = (const uint8_t *)src + src_stride * src_y + src_x * 4;
> +	dst_bytes = (uint8_t *)dst + dst_stride * dst_y + dst_x * 4;
> +
> +	byte_width = width * 4;
> +	if (byte_width == src_stride && byte_width == dst_stride) {
> +		byte_width *= height;
> +		height = 1;
> +	}
> +
> +	switch (byte_width) {
> +	case 4:
> +		do {
> +			*(uint32_t *)dst_bytes = *(const uint32_t *)src_bytes;
> +			src_bytes += src_stride;
> +			dst_bytes += dst_stride;
> +		} while (--height);
> +		break;
> +
> +	case 8:
> +		do {
> +			*(uint64_t *)dst_bytes = *(const uint64_t *)src_bytes;
> +			src_bytes += src_stride;
> +			dst_bytes += dst_stride;
> +		} while (--height);
> +		break;
> +	case 16:
> +		do {
> +			((uint64_t *)dst_bytes)[0] = ((const uint64_t *)src_bytes)[0];
> +			((uint64_t *)dst_bytes)[1] = ((const uint64_t *)src_bytes)[1];
> +			src_bytes += src_stride;
> +			dst_bytes += dst_stride;
> +		} while (--height);
> +		break;
> +
> +	default:
> +		do {
> +			memcpy(dst_bytes, src_bytes, byte_width);
> +			src_bytes += src_stride;
> +			dst_bytes += dst_stride;
> +		} while (--height);
> +		break;
> +	}
> +}
> +
> +static void
> +blit(const struct device *device,
> +     struct buffer *src, uint16_t src_x, uint16_t src_y,
> +     struct buffer *dst, uint16_t dst_x, uint16_t dst_y,
> +     uint16_t width, uint16_t height)
> +
> +{
> +	struct drm_i915_gem_exec_object2 obj[3];
> +	struct drm_i915_gem_relocation_entry reloc[2];
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	const bool has_64b_reloc = device->gen >= 8;
> +	uint32_t *batch;
> +	uint32_t pitch;
> +	int i = 0;
> +
> +	if (src_x < 0) {
> +		width += src_x;
> +		dst_x -= src_x;
> +		src_x = 0;
> +	}
> +	if (src_y < 0) {
> +		height += src_y;
> +		dst_y  -= src_y;
> +		src_y = 0;
> +	}
> +
> +	if (dst_x < 0) {
> +		width += dst_x;
> +		src_x -= dst_x;
> +		dst_x = 0;
> +	}
> +	if (dst_y < 0) {
> +		height += dst_y;
> +		src_y  -= dst_y;
> +		dst_y = 0;
> +	}
> +
> +	if (src_x + width > src->width)
> +		width = src->width - src_x;
> +	if (dst_x + width > dst->width)
> +		width = dst->width - dst_x;
> +
> +	if (src_y + height > src->height)
> +		height = src->height - src_y;
> +	if (dst_y + height > dst->height)
> +		height = dst->height - dst_y;
> +
> +	if (dst->caching) {
> +		igt_assert(device->gen >= 3);
> +		igt_assert(device->llc || !src->caching);
> +	}
> +
How did you calculated this values above ^ ?

> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = to_user_pointer(obj);
> +	execbuf.buffer_count = ARRAY_SIZE(obj);
> +	if (device->gen >= 6)
> +		execbuf.flags = I915_EXEC_BLT;
> +
> +	memset(obj, 0, sizeof(obj));
> +	obj[0].handle = dst->handle;
> +	obj[0].offset = dst->gtt_offset;
> +	if (dst->tiling)
> +		obj[0].flags = EXEC_OBJECT_NEEDS_FENCE;
> +
> +	obj[1].handle = src->handle;
> +	obj[1].offset = src->gtt_offset;
> +	if (src->tiling)
> +		obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
> +
> +	memset(reloc, 0, sizeof(reloc));
> +	obj[2].handle = gem_create(device->fd, 4096);
> +	obj[2].relocs_ptr = to_user_pointer(reloc);
> +	obj[2].relocation_count = ARRAY_SIZE(reloc);
> +	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
> +
> +	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
> +		unsigned int mask;
> +
> +		batch[i++] = MI_LOAD_REGISTER_IMM;
> +		batch[i++] = BCS_SWCTRL;
> +
> +		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
> +		if (src->tiling == I915_TILING_Y)
> +			mask |= BCS_SRC_Y;
> +		if (dst->tiling == I915_TILING_Y)
> +			mask |= BCS_DST_Y;
> +		batch[i++] = mask;
> +	}
> +
> +	batch[i] = (XY_SRC_COPY_BLT_CMD |
> +		    XY_SRC_COPY_BLT_WRITE_ALPHA |
> +		    XY_SRC_COPY_BLT_WRITE_RGB);
> +	batch[i] |= 6 + 2 * has_64b_reloc;
> +	if (device->gen >= 4 && src->tiling)
> +		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
> +	if (device->gen >= 4 && dst->tiling)
> +		batch[i] |= XY_SRC_COPY_BLT_DST_TILED;
> +	i++;
> +
> +	pitch = dst->stride;
> +	if (device->gen >= 4 && dst->tiling)
> +		pitch /= 4;
> +	batch[i++] = 3 << 24 | 0xcc << 16 | pitch;
> +
> +	batch[i++] = dst_y << 16 | dst_x;
> +	batch[i++] = (height + dst_y) << 16 | (width + dst_x);
> +	reloc[0].target_handle = obj[0].handle;
> +	reloc[0].presumed_offset = obj[0].offset;
> +	reloc[0].offset = sizeof(*batch) * i;
> +	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> +	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = obj[0].offset;
> +	if (has_64b_reloc)
> +		batch[i++] = obj[0].offset >> 32;
> +
> +	batch[i++] = src_y << 16 | src_x;
> +	pitch = src->stride;
> +	if (device->gen >= 4 && src->tiling)
> +		pitch /= 4;
> +	batch[i++] = pitch;
> +	reloc[1].target_handle = obj[1].handle;
> +	reloc[1].presumed_offset = obj[1].offset;
> +	reloc[1].offset = sizeof(*batch) * i;
> +	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = obj[1].offset;
> +	if (has_64b_reloc)
> +		batch[i++] = obj[1].offset >> 32;
> +
> +	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
> +		igt_assert(device->gen >= 6);
> +		batch[i++] = MI_FLUSH_DW | 2;
> +		batch[i++] = 0;
> +		batch[i++] = 0;
> +		batch[i++] = 0;
> +
> +		batch[i++] = MI_LOAD_REGISTER_IMM;
> +		batch[i++] = BCS_SWCTRL;
> +		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
> +	}
> +
> +	batch[i++] = MI_BATCH_BUFFER_END;
> +	munmap(batch, 4096);
> +
> +	gem_execbuf(device->fd, &execbuf);
> +	gem_close(device->fd, obj[2].handle);
> +
> +	dst->gtt_offset = obj[0].offset;
> +	src->gtt_offset = obj[1].offset;
> +
> +	memcpy_blt(src->model, dst->model,
> +		   4u * src->width, 4u * dst->width,
> +		   src_x, src_y,
> +		   dst_x, dst_y,
> +		   width, height);
> +}
> +
> +igt_main
> +{
> +	struct device device;
> +
> +	igt_fixture {
> +		device.fd = drm_open_driver_render(DRIVER_INTEL);
> +		igt_require_gem(device.fd);
> +
> +		device.pciid = intel_get_drm_devid(device.fd);
> +		device.gen = intel_gen(device.pciid);
> +		device.llc = gem_has_llc(device.fd);
> +	}
> +
> +	igt_subtest("basic") {
> +		struct buffer *src, *dst;
> +		unsigned int x, y;
> +
> +		for (unsigned int height = 1; height <= 16; height <<= 1) {
> +			for (unsigned int width = 1; width <= 64; width <<= 1) {
> +				src = buffer_create(&device,
> +						    width * 16, height * 4);
> +				dst = buffer_create(&device,
> +						    width * 16, height * 4);
> +
> +				y = 0;
> +				for (unsigned int src_tiling = I915_TILING_NONE;
> +				     src_tiling <= (device.gen >= 6 ? I915_TILING_Y : I915_TILING_X);
> +				     src_tiling++) {
> +					buffer_set_tiling(&device, src, src_tiling);
> +
> +					x = 0;
> +					for (unsigned int dst_tiling = I915_TILING_NONE;
> +					     dst_tiling <= (device.gen >= 6 ? I915_TILING_Y : I915_TILING_X);
> +					     dst_tiling++) {
> +						buffer_set_tiling(&device, dst, dst_tiling);
> +
> +						for (enum mode down = CPU; down <= WC; down++) {
> +							igt_debug("Testing src_tiling=%d, dst_tiling=%d, down=%d at (%d, %d) x (%d, %d)\n",
> +								  src_tiling,
> +								  dst_tiling,
> +								  down, x, y,
> +								  width, height);
> +
> +							igt_assert(x + width <= dst->width);
> +							igt_assert(y + height <= dst->height);
> +
> +							blit(&device,
> +							     src, x, y,
> +							     dst, x, y,
> +							     width, height);
> +							igt_assert(buffer_check(&device, dst, down));
> +
> +							x += width;
> +						}
> +					}
> +
> +					y += height;
> +				}
> +
> +				buffer_free(&device, dst);
> +				buffer_free(&device, src);
> +			}
> +		}
> +	}
> +}

Whole test with all helper functions seems to be not clear enough. 
I would prefer to have more comments of what is going on in the code,
especially in creating batches.

General question - does this test gives additional coverge or will be
a substitue for some existing?

Kasia :)

> diff --git a/tests/meson.build b/tests/meson.build
> index 32c2156c6..becadfa8d 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -18,6 +18,7 @@ test_progs = [
>  	'drv_suspend',
>  	'gem_bad_reloc',
>  	'gem_basic',
> +	'gem_blits',
>  	'gem_busy',
>  	'gem_caching',
>  	'gem_close',
> -- 
> 2.18.0
> 
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits
  2018-08-01 14:47 ` Katarzyna Dec
@ 2018-08-01 15:10   ` Chris Wilson
  2018-08-02 11:05     ` Katarzyna Dec
  0 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2018-08-01 15:10 UTC (permalink / raw)
  To: Katarzyna Dec; +Cc: igt-dev

Quoting Katarzyna Dec (2018-08-01 15:47:39)
> On Wed, Jul 25, 2018 at 10:38:23PM +0100, Chris Wilson wrote:
> > +static unsigned int
> > +get_tiling_stride(const struct device *device,
> > +               unsigned int width, unsigned int tiling)
> > +{
> > +     unsigned int stride = 4u * width;
> > +
> > +     if (tiling) {
> > +             if (device->gen < 3)
> > +                     stride = ALIGN(stride, 128);
> > +             else if (device->gen < 4 || tiling == I915_TILING_X)
> > +                     stride = ALIGN(stride, 512);
> > +             else
> > +                     stride = ALIGN(stride, 128);
> > +             if (device->gen < 4)
> > +                     stride = 1 << fls(stride - 1);
> Shouldn't 'else' be last here ^? What about order of 'else if' and 'if'
> at the end?

First align to the stride, then round up to a pot for old gen. It can be
done either away around; the rule is just it has to be both a multiple
of tile_width and a power of two.

> > +     } else {
> > +             if (device->gen >= 8)
> > +                     stride = ALIGN(stride, 64);

This is one that is more dubious as it papering over a hw bump that is a
bit more subtle. After all part of the reason for this test is to detect
errors like that.

> > +     if ((tiling | buffer->tiling) >= I915_TILING_Y) {
> > +             unsigned int mask;
> > +
> > +             batch[i++] = MI_LOAD_REGISTER_IMM;
> > +             batch[i++] = BCS_SWCTRL;
> > +
> 
> All batch creation staring from here ^ looks misterious. Maybe we can make
> buffer_set_tiling and buffer_linear functions more clearer?
> It is hard do read and understand.

About the only thing I might touch is the lri. But I am not that
convinced it's worth it in the grand scheme.

> > +static void *download(const struct device *device,
> > +                   const struct buffer *buffer,
> > +                   enum mode mode)
> > +{
> > +     void *linear, *src;
> > +
> > +     igt_assert(posix_memalign(&linear, 4096, buffer->size) == 0);
> > +
> > +     switch (mode) {
> > +     case CPU:
> > +             if (buffer->tiling) {
> > +                     if (buffer->caching && !device->llc) {
> > +                             mode = GTT;
> > +                             break;
> > +                     }
> > +                     if (device->gen < 3) {
> > +                             mode = GTT;
> > +                             break;
> > +                     }
> Why do we need 2 if-s here ^? Both are setting the same mode.

Semantics, they are for different reasons, but related reasons.
Not allowed to copy between snooped surfaces; and gen2 doesn't allow
copy to snoop, just copy from snoop.

> > +     if (dst->caching) {
> > +             igt_assert(device->gen >= 3);
> > +             igt_assert(device->llc || !src->caching);
> > +     }
> > +
> How did you calculated this values above ^ ?

^^

> Whole test with all helper functions seems to be not clear enough. 
> I would prefer to have more comments of what is going on in the code,
> especially in creating batches.

There isn't much going on, no tricks, just straight forward XY_SRC_COPY.
 
> General question - does this test gives additional coverge or will be
> a substitue for some existing?

Both more and less at the moment. There's better testing for the copy
and access patterns, but less around the limits of the GTT. But atm we
only do the sanitychecks in CI, so from that perspective this is much
more complete testing of things like pixel/cacheline/page access between
tiling modes.

The challenge I have in mind is how to let it explore by itself. A small
bytecode to try out different combinations of state is one way, but that
only covers the primitives we have modelled and therein lies the rub.
How to go completely crazy yet still detect error (other than oopses).
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits
  2018-08-01 15:10   ` Chris Wilson
@ 2018-08-02 11:05     ` Katarzyna Dec
  0 siblings, 0 replies; 11+ messages in thread
From: Katarzyna Dec @ 2018-08-02 11:05 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

On Wed, Aug 01, 2018 at 04:10:07PM +0100, Chris Wilson wrote:
> Quoting Katarzyna Dec (2018-08-01 15:47:39)
> > On Wed, Jul 25, 2018 at 10:38:23PM +0100, Chris Wilson wrote:
> > > +static unsigned int
> > > +get_tiling_stride(const struct device *device,
> > > +               unsigned int width, unsigned int tiling)
> > > +{
> > > +     unsigned int stride = 4u * width;
> > > +
> > > +     if (tiling) {
                    /* align to the stride */
> > > +             if (device->gen < 3)
> > > +                     stride = ALIGN(stride, 128);
> > > +             else if (device->gen < 4 || tiling == I915_TILING_X)
> > > +                     stride = ALIGN(stride, 512);
> > > +             else
> > > +                     stride = ALIGN(stride, 128);
                    /* round up to a pot for old gen */
> > > +             if (device->gen < 4)
> > > +                     stride = 1 << fls(stride - 1);
> > Shouldn't 'else' be last here ^? What about order of 'else if' and 'if'
> > at the end?
> 
> First align to the stride, then round up to a pot for old gen. It can be
> done either away around; the rule is just it has to be both a multiple
> of tile_width and a power of two.
>
Now I understand. Maybe it would be worth to add some comment about that
in the code? It would be much more clear for everybody.
> > > +     } else {
> > > +             if (device->gen >= 8)
> > > +                     stride = ALIGN(stride, 64);
> 
> This is one that is more dubious as it papering over a hw bump that is a
> bit more subtle. After all part of the reason for this test is to detect
> errors like that.
> 
> > > +     if ((tiling | buffer->tiling) >= I915_TILING_Y) {
> > > +             unsigned int mask;
> > > +
> > > +             batch[i++] = MI_LOAD_REGISTER_IMM;
> > > +             batch[i++] = BCS_SWCTRL;
> > > +
> > 
> > All batch creation staring from here ^ looks misterious. Maybe we can make
> > buffer_set_tiling and buffer_linear functions more clearer?
> > It is hard do read and understand.
> 
> About the only thing I might touch is the lri. But I am not that
> convinced it's worth it in the grand scheme.
>
Some documentation is better then no documentation. I know that code should be
it's own judge, but you need to have deep understanding in driver code to get
what is going on with creating these batches.
And by docs I do not mean every super formatted detail, just small comments
inline comments.

> > > +static void *download(const struct device *device,
> > > +                   const struct buffer *buffer,
> > > +                   enum mode mode)
> > > +{
> > > +     void *linear, *src;
> > > +
> > > +     igt_assert(posix_memalign(&linear, 4096, buffer->size) == 0);
> > > +
> > > +     switch (mode) {
> > > +     case CPU:
> > > +             if (buffer->tiling) {
> > > +                     if (buffer->caching && !device->llc) {
> > > +                             mode = GTT;
> > > +                             break;
> > > +                     }
> > > +                     if (device->gen < 3) {
> > > +                             mode = GTT;
> > > +                             break;
> > > +                     }
> > Why do we need 2 if-s here ^? Both are setting the same mode.
> 
> Semantics, they are for different reasons, but related reasons.
> Not allowed to copy between snooped surfaces; and gen2 doesn't allow
> copy to snoop, just copy from snoop.
Same as above, it would be nice to heve even one line comment here.
> 
> > > +     if (dst->caching) {
> > > +             igt_assert(device->gen >= 3);
> > > +             igt_assert(device->llc || !src->caching);
> > > +     }
> > > +
> > How did you calculated this values above ^ ?
> 
> ^^
> 
> > Whole test with all helper functions seems to be not clear enough. 
> > I would prefer to have more comments of what is going on in the code,
> > especially in creating batches.
> 
> There isn't much going on, no tricks, just straight forward XY_SRC_COPY.
>
It would be nice if you introduce some one-line comments in few places. Maybe
code is simple, but it doesn't look like that.

Kasia :)
> > General question - does this test gives additional coverge or will be
> > a substitue for some existing?
> 
> Both more and less at the moment. There's better testing for the copy
> and access patterns, but less around the limits of the GTT. But atm we
> only do the sanitychecks in CI, so from that perspective this is much
> more complete testing of things like pixel/cacheline/page access between
> tiling modes.
> 
> The challenge I have in mind is how to let it explore by itself. A small
> bytecode to try out different combinations of state is one way, but that
> only covers the primitives we have modelled and therein lies the rub.
> How to go completely crazy yet still detect error (other than oopses).
> -Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits
@ 2019-11-05 11:15 Chris Wilson
  2019-11-05 11:51 ` [igt-dev] ✗ GitLab.Pipeline: warning for igt: Another combinatorial exercise for blits (rev2) Patchwork
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Chris Wilson @ 2019-11-05 11:15 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

The aim of this test is to combine gem_linear_blits, gem_tiled_blits etc
into one test runner that covers investigation into HW alignment issues
as well as driver boundaries (relocs, access, thrashing).

See also gem_concurrent_blits which looks for coherency issues between
parallel execution.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Katarzyna Dec <katarzyna.dec@intel.com>
Cc: Vanshidhar Konda <vanshidhar.r.konda@intel.com>
---
 tests/Makefile.sources |   1 +
 tests/i915/gem_blits.c | 796 +++++++++++++++++++++++++++++++++++++++++
 tests/meson.build      |   1 +
 3 files changed, 798 insertions(+)
 create mode 100644 tests/i915/gem_blits.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 093eb57f3..f63602ef0 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -24,6 +24,7 @@ TESTS_progs = \
 	drm_import_export \
 	drm_mm \
 	drm_read \
+	i915/gem_blits \
 	kms_3d \
 	kms_addfb_basic \
 	kms_atomic \
diff --git a/tests/i915/gem_blits.c b/tests/i915/gem_blits.c
new file mode 100644
index 000000000..e6a840ffb
--- /dev/null
+++ b/tests/i915/gem_blits.c
@@ -0,0 +1,796 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include "igt_x86.h"
+
+#define MI_FLUSH_DW (0x26 << 23)
+
+#define BCS_SWCTRL 0x22200
+#define BCS_SRC_Y (1 << 0)
+#define BCS_DST_Y (1 << 1)
+
+struct device {
+	int fd;
+	int gen;
+	int pciid;
+	int llc;
+};
+
+struct buffer {
+	uint32_t handle;
+	uint16_t width;
+	uint16_t height;
+	uint16_t stride;
+	uint32_t size;
+	unsigned int tiling;
+	unsigned int caching;
+	uint64_t gtt_offset;
+	uint32_t model[] __attribute__((aligned(16)));
+};
+
+enum mode {
+	CPU,
+	PRW,
+	GTT,
+	WC,
+};
+
+static int fls(uint64_t x)
+{
+	int t;
+
+	for (t = 0; x >> t; t++)
+		;
+
+	return t;
+}
+
+static unsigned int
+get_tiling_stride(const struct device *device,
+		  unsigned int width, unsigned int tiling)
+{
+	unsigned int stride = 4u * width;
+
+	if (tiling) {
+		if (device->gen < 3)
+			stride = ALIGN(stride, 128);
+		else if (device->gen < 4 || tiling == I915_TILING_X)
+			stride = ALIGN(stride, 512);
+		else
+			stride = ALIGN(stride, 128);
+		if (device->gen < 4)
+			stride = 1 << fls(stride - 1);
+	} else {
+		if (device->gen >= 8)
+			stride = ALIGN(stride, 64);
+	}
+
+	igt_assert(stride < UINT16_MAX && stride >= 4*width);
+	return stride;
+}
+
+static unsigned int
+get_tiling_height(const struct device *device,
+		  unsigned int height, unsigned int tiling)
+{
+	if (!tiling)
+		return height;
+
+	if (device->gen < 3)
+		return ALIGN(height, 16);
+	else if (device->gen < 4 || tiling == I915_TILING_X)
+		return ALIGN(height, 8);
+	else
+		return ALIGN(height, 32);
+}
+
+static struct buffer *buffer_create(const struct device *device,
+				    unsigned int width,
+				    unsigned int height)
+{
+	struct buffer *buffer;
+
+	igt_assert(width && height);
+
+	buffer = malloc(sizeof(*buffer) + 4u * width * height);
+	if (!buffer)
+		return NULL;
+
+	buffer->width = width;
+	buffer->height = height;
+
+	buffer->tiling = I915_TILING_NONE;
+	buffer->stride = get_tiling_stride(device, width, I915_TILING_NONE);
+	buffer->size = ALIGN(buffer->stride * height, 4096);
+	buffer->handle = gem_create(device->fd, buffer->size);
+	buffer->caching = device->llc;
+
+	buffer->gtt_offset = buffer->handle * buffer->size;
+
+	for (int y = 0; y < height; y++) {
+		uint32_t *row = buffer->model + y * width;
+
+		for (int x = 0; x < width; x++)
+			row[x] = (y << 16 | x) ^ buffer->handle;
+
+		gem_write(device->fd,
+			  buffer->handle, 4u * y * width,
+			  row, 4u * width);
+	}
+
+	return buffer;
+}
+
+static void buffer_set_tiling(const struct device *device,
+			      struct buffer *buffer,
+			      unsigned int tiling)
+{
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	const bool has_64b_reloc = device->gen >= 8;
+	uint32_t stride, size, pitch;
+	uint32_t *batch;
+	int i;
+
+	if (buffer->tiling == tiling)
+		return;
+
+	stride = get_tiling_stride(device, buffer->width, tiling);
+	size = stride * get_tiling_height(device, buffer->height, tiling);
+	size = ALIGN(size, 4096);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = ARRAY_SIZE(obj);
+	if (device->gen >= 6)
+		execbuf.flags = I915_EXEC_BLT;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = gem_create(device->fd, size);
+	if (tiling) {
+		obj[0].flags = EXEC_OBJECT_NEEDS_FENCE;
+		gem_set_tiling(device->fd, obj[0].handle, tiling, stride);
+	}
+
+	obj[1].handle = buffer->handle;
+	obj[1].offset = buffer->gtt_offset;
+	if (buffer->tiling)
+		obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
+
+	obj[2].handle = gem_create(device->fd, 4096);
+	obj[2].relocs_ptr = to_user_pointer(memset(reloc, 0, sizeof(reloc)));
+	obj[2].relocation_count = 2;
+	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
+
+	i = 0;
+
+	if ((tiling | buffer->tiling) >= I915_TILING_Y) {
+		unsigned int mask;
+
+		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = BCS_SWCTRL;
+
+		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
+		if (buffer->tiling == I915_TILING_Y)
+			mask |= BCS_SRC_Y;
+		if (tiling == I915_TILING_Y)
+			mask |= BCS_DST_Y;
+		batch[i++] = mask;
+	}
+
+	batch[i] = (XY_SRC_COPY_BLT_CMD |
+		    XY_SRC_COPY_BLT_WRITE_ALPHA |
+		    XY_SRC_COPY_BLT_WRITE_RGB);
+	if (device->gen >= 4 && buffer->tiling)
+		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
+	if (device->gen >= 4 && tiling)
+		batch[i] |= XY_SRC_COPY_BLT_DST_TILED;
+	batch[i++] |= 6 + 2 * has_64b_reloc;
+
+	pitch = stride;
+	if (device->gen >= 4 && tiling)
+		pitch /= 4;
+	batch[i++] = 3 << 24 | 0xcc << 16 | pitch;
+	batch[i++] = 0;
+	batch[i++] = buffer->height << 16 | buffer->width;
+	reloc[0].target_handle = obj[0].handle;
+	reloc[0].presumed_offset = obj[0].offset;
+	reloc[0].offset = sizeof(*batch) * i;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = obj[0].offset;
+	if (has_64b_reloc)
+		batch[i++] = obj[0].offset >> 32;
+
+	batch[i++] = 0;
+	pitch = buffer->stride;
+	if (device->gen >= 4 && buffer->tiling)
+		pitch /= 4;
+	batch[i++] = pitch;
+	reloc[1].target_handle = obj[1].handle;
+	reloc[1].presumed_offset = obj[1].offset;
+	reloc[1].offset = sizeof(*batch) * i;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = obj[1].offset;
+	if (has_64b_reloc)
+		batch[i++] = obj[1].offset >> 32;
+
+	if ((tiling | buffer->tiling) >= I915_TILING_Y) {
+		igt_assert(device->gen >= 6);
+		batch[i++] = MI_FLUSH_DW | 2;
+		batch[i++] = 0;
+		batch[i++] = 0;
+		batch[i++] = 0;
+
+		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = BCS_SWCTRL;
+		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
+	}
+
+	batch[i++] = MI_BATCH_BUFFER_END;
+	munmap(batch, 4096);
+
+	gem_execbuf(device->fd, &execbuf);
+
+	gem_close(device->fd, obj[2].handle);
+	gem_close(device->fd, obj[1].handle);
+
+	buffer->gtt_offset = obj[0].offset;
+	buffer->handle = obj[0].handle;
+
+	buffer->tiling = tiling;
+	buffer->stride = stride;
+	buffer->size = size;
+}
+
+static bool can_blit_to_linear(const struct device *device,
+			       const struct buffer *buffer)
+{
+	if (buffer->caching && !device->llc)
+		return false;
+
+	if (device->gen < 3)
+		return false;
+
+	return true;
+}
+
+static bool blit_to_linear(const struct device *device,
+			   const struct buffer *buffer,
+			   void *linear)
+{
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	const bool has_64b_reloc = device->gen >= 8;
+	uint32_t *batch;
+	uint32_t pitch;
+	int i = 0;
+
+	igt_assert(buffer->tiling);
+
+	if (!can_blit_to_linear(device, buffer))
+		return false;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = ARRAY_SIZE(obj);
+	if (device->gen >= 6)
+		execbuf.flags = I915_EXEC_BLT;
+
+	memset(obj, 0, sizeof(obj));
+	if (__gem_userptr(device->fd, linear, buffer->size, 0, 0, &obj[0].handle))
+		return false;
+
+	obj[1].handle = buffer->handle;
+	obj[1].offset = buffer->gtt_offset;
+	obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
+
+	memset(reloc, 0, sizeof(reloc));
+	obj[2].handle = gem_create(device->fd, 4096);
+	obj[2].relocs_ptr = to_user_pointer(reloc);
+	obj[2].relocation_count = ARRAY_SIZE(reloc);
+	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
+
+	if (buffer->tiling >= I915_TILING_Y) {
+		unsigned int mask;
+
+		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = BCS_SWCTRL;
+
+		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
+		if (buffer->tiling == I915_TILING_Y)
+			mask |= BCS_SRC_Y;
+		batch[i++] = mask;
+	}
+
+	batch[i] = (XY_SRC_COPY_BLT_CMD |
+		    XY_SRC_COPY_BLT_WRITE_ALPHA |
+		    XY_SRC_COPY_BLT_WRITE_RGB);
+	if (device->gen >= 4 && buffer->tiling)
+		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
+	batch[i++] |= 6 + 2 * has_64b_reloc;
+
+	batch[i++] = 3 << 24 | 0xcc << 16 | buffer->stride;
+	batch[i++] = 0;
+	batch[i++] = buffer->height << 16 | buffer->width;
+	reloc[0].target_handle = obj[0].handle;
+	reloc[0].presumed_offset = obj[0].offset;
+	reloc[0].offset = sizeof(*batch) * i;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = obj[0].offset;
+	if (has_64b_reloc)
+		batch[i++] = obj[0].offset >> 32;
+
+	batch[i++] = 0;
+	pitch = buffer->stride;
+	if (device->gen >= 4 && buffer->tiling)
+		pitch /= 4;
+	batch[i++] = pitch;
+	reloc[1].target_handle = obj[1].handle;
+	reloc[1].presumed_offset = obj[1].offset;
+	reloc[1].offset = sizeof(*batch) * i;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = obj[1].offset;
+	if (has_64b_reloc)
+		batch[i++] = obj[1].offset >> 32;
+
+	if (buffer->tiling >= I915_TILING_Y) {
+		igt_assert(device->gen >= 6);
+		batch[i++] = MI_FLUSH_DW | 2;
+		batch[i++] = 0;
+		batch[i++] = 0;
+		batch[i++] = 0;
+
+		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = BCS_SWCTRL;
+		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
+	}
+
+	batch[i++] = MI_BATCH_BUFFER_END;
+	munmap(batch, 4096);
+
+	gem_execbuf(device->fd, &execbuf);
+	gem_close(device->fd, obj[2].handle);
+
+	gem_sync(device->fd, obj[0].handle);
+	gem_close(device->fd, obj[0].handle);
+
+	return true;
+}
+
+static void *download(const struct device *device,
+		      const struct buffer *buffer,
+		      enum mode mode)
+{
+	void *linear, *src;
+
+	igt_assert(posix_memalign(&linear, 4096, buffer->size) == 0);
+
+	switch (mode) {
+	case CPU:
+		if (buffer->tiling) {
+			if (blit_to_linear(device, buffer, linear))
+				return linear;
+
+			mode = GTT;
+		}
+		break;
+
+	case WC:
+		if (!gem_mmap__has_wc(device->fd) || buffer->tiling)
+			mode = GTT;
+		break;
+
+	case PRW:
+		if (buffer->tiling)
+			mode = GTT;
+		break;
+
+	case GTT:
+		break;
+	}
+
+	switch (mode) {
+	case CPU:
+		src = gem_mmap__cpu(device->fd, buffer->handle,
+				    0, buffer->size,
+				    PROT_READ);
+
+		gem_set_domain(device->fd, buffer->handle,
+			       I915_GEM_DOMAIN_CPU, 0);
+		igt_memcpy_from_wc(linear, src, buffer->size);
+		munmap(src, buffer->size);
+		break;
+
+	case WC:
+		src = gem_mmap__wc(device->fd, buffer->handle,
+				   0, buffer->size,
+				   PROT_READ);
+
+		gem_set_domain(device->fd, buffer->handle,
+			       I915_GEM_DOMAIN_WC, 0);
+		igt_memcpy_from_wc(linear, src, buffer->size);
+		munmap(src, buffer->size);
+		break;
+
+	case GTT:
+		src = gem_mmap__gtt(device->fd, buffer->handle,
+				   buffer->size,
+				   PROT_READ);
+
+		gem_set_domain(device->fd, buffer->handle,
+			       I915_GEM_DOMAIN_GTT, 0);
+		igt_memcpy_from_wc(linear, src, buffer->size);
+		munmap(src, buffer->size);
+		break;
+
+	case PRW:
+		gem_read(device->fd, buffer->handle, 0, linear, buffer->size);
+		break;
+	}
+
+	return linear;
+}
+
+static bool buffer_check(const struct device *device,
+			 const struct buffer *buffer,
+			 enum mode mode)
+{
+	unsigned int num_errors = 0;
+	uint32_t *linear;
+
+	linear = download(device, buffer, mode);
+	igt_assert(linear);
+
+	for (int y = 0; y < buffer->height; y++) {
+		const uint32_t *model = buffer->model + y * buffer->width;
+		const uint32_t *row =
+			linear + y * buffer->stride / sizeof(uint32_t);
+
+		if (!memcmp(model, row, buffer->width * sizeof(uint32_t)))
+			continue;
+
+		for (int x = 0; x < buffer->width; x++) {
+			if (row[x] != model[x] && num_errors++ < 5) {
+				igt_warn("buffer handle=%d mismatch at (%d, %d): expected %08x, found %08x\n",
+					 buffer->handle,
+					 x, y, model[x], row[x]);
+			}
+		}
+	}
+
+	free(linear);
+
+	return num_errors == 0;
+}
+
+static void buffer_free(const struct device *device, struct buffer *buffer)
+{
+	igt_assert(buffer_check(device, buffer, GTT));
+	gem_close(device->fd, buffer->handle);
+	free(buffer);
+}
+
+static void memcpy_blt(const void *src, void *dst,
+		       uint32_t src_stride, uint32_t dst_stride,
+		       uint16_t src_x, uint16_t src_y,
+		       uint16_t dst_x, uint16_t dst_y,
+		       uint16_t width, uint16_t height)
+{
+	const uint8_t *src_bytes;
+	uint8_t *dst_bytes;
+	int byte_width;
+
+	src_bytes = (const uint8_t *)src + src_stride * src_y + src_x * 4;
+	dst_bytes = (uint8_t *)dst + dst_stride * dst_y + dst_x * 4;
+
+	byte_width = width * 4;
+	if (byte_width == src_stride && byte_width == dst_stride) {
+		byte_width *= height;
+		height = 1;
+	}
+
+	switch (byte_width) {
+	case 4:
+		do {
+			*(uint32_t *)dst_bytes = *(const uint32_t *)src_bytes;
+			src_bytes += src_stride;
+			dst_bytes += dst_stride;
+		} while (--height);
+		break;
+
+	case 8:
+		do {
+			*(uint64_t *)dst_bytes = *(const uint64_t *)src_bytes;
+			src_bytes += src_stride;
+			dst_bytes += dst_stride;
+		} while (--height);
+		break;
+	case 16:
+		do {
+			((uint64_t *)dst_bytes)[0] = ((const uint64_t *)src_bytes)[0];
+			((uint64_t *)dst_bytes)[1] = ((const uint64_t *)src_bytes)[1];
+			src_bytes += src_stride;
+			dst_bytes += dst_stride;
+		} while (--height);
+		break;
+
+	default:
+		do {
+			memcpy(dst_bytes, src_bytes, byte_width);
+			src_bytes += src_stride;
+			dst_bytes += dst_stride;
+		} while (--height);
+		break;
+	}
+}
+
+static void
+blit(const struct device *device,
+     struct buffer *src, uint16_t src_x, uint16_t src_y,
+     struct buffer *dst, uint16_t dst_x, uint16_t dst_y,
+     uint16_t width, uint16_t height)
+
+{
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	const bool has_64b_reloc = device->gen >= 8;
+	uint32_t *batch;
+	uint32_t pitch;
+	int i = 0;
+
+	if (src_x < 0) {
+		width += src_x;
+		dst_x -= src_x;
+		src_x  = 0;
+	}
+	if (src_y < 0) {
+		height += src_y;
+		dst_y  -= src_y;
+		src_y   = 0;
+	}
+
+	if (dst_x < 0) {
+		width += dst_x;
+		src_x -= dst_x;
+		dst_x  = 0;
+	}
+	if (dst_y < 0) {
+		height += dst_y;
+		src_y  -= dst_y;
+		dst_y   = 0;
+	}
+
+	if (src_x + width > src->width)
+		width = src->width - src_x;
+	if (dst_x + width > dst->width)
+		width = dst->width - dst_x;
+
+	if (src_y + height > src->height)
+		height = src->height - src_y;
+	if (dst_y + height > dst->height)
+		height = dst->height - dst_y;
+
+	if (dst->caching) {
+		igt_assert(device->gen >= 3);
+		igt_assert(device->llc || !src->caching);
+	}
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = ARRAY_SIZE(obj);
+	if (device->gen >= 6)
+		execbuf.flags = I915_EXEC_BLT;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = dst->handle;
+	obj[0].offset = dst->gtt_offset;
+	if (dst->tiling)
+		obj[0].flags = EXEC_OBJECT_NEEDS_FENCE;
+
+	obj[1].handle = src->handle;
+	obj[1].offset = src->gtt_offset;
+	if (src->tiling)
+		obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
+
+	memset(reloc, 0, sizeof(reloc));
+	obj[2].handle = gem_create(device->fd, 4096);
+	obj[2].relocs_ptr = to_user_pointer(reloc);
+	obj[2].relocation_count = ARRAY_SIZE(reloc);
+	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
+
+	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
+		unsigned int mask;
+
+		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = BCS_SWCTRL;
+
+		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
+		if (src->tiling == I915_TILING_Y)
+			mask |= BCS_SRC_Y;
+		if (dst->tiling == I915_TILING_Y)
+			mask |= BCS_DST_Y;
+		batch[i++] = mask;
+	}
+
+	batch[i] = (XY_SRC_COPY_BLT_CMD |
+		    XY_SRC_COPY_BLT_WRITE_ALPHA |
+		    XY_SRC_COPY_BLT_WRITE_RGB);
+	if (device->gen >= 4 && src->tiling)
+		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
+	if (device->gen >= 4 && dst->tiling)
+		batch[i] |= XY_SRC_COPY_BLT_DST_TILED;
+	batch[i++] |= 6 + 2 * has_64b_reloc;
+
+	pitch = dst->stride;
+	if (device->gen >= 4 && dst->tiling)
+		pitch /= 4;
+	batch[i++] = 3 << 24 | 0xcc << 16 | pitch;
+
+	batch[i++] = dst_y << 16 | dst_x;
+	batch[i++] = (height + dst_y) << 16 | (width + dst_x);
+	reloc[0].target_handle = obj[0].handle;
+	reloc[0].presumed_offset = obj[0].offset;
+	reloc[0].offset = sizeof(*batch) * i;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = obj[0].offset;
+	if (has_64b_reloc)
+		batch[i++] = obj[0].offset >> 32;
+
+	batch[i++] = src_y << 16 | src_x;
+	pitch = src->stride;
+	if (device->gen >= 4 && src->tiling)
+		pitch /= 4;
+	batch[i++] = pitch;
+	reloc[1].target_handle = obj[1].handle;
+	reloc[1].presumed_offset = obj[1].offset;
+	reloc[1].offset = sizeof(*batch) * i;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = obj[1].offset;
+	if (has_64b_reloc)
+		batch[i++] = obj[1].offset >> 32;
+
+	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
+		igt_assert(device->gen >= 6);
+		batch[i++] = MI_FLUSH_DW | 2;
+		batch[i++] = 0;
+		batch[i++] = 0;
+		batch[i++] = 0;
+
+		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = BCS_SWCTRL;
+		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
+	}
+
+	batch[i++] = MI_BATCH_BUFFER_END;
+	munmap(batch, 4096);
+
+	gem_execbuf(device->fd, &execbuf);
+	gem_close(device->fd, obj[2].handle);
+
+	dst->gtt_offset = obj[0].offset;
+	src->gtt_offset = obj[1].offset;
+
+	memcpy_blt(src->model, dst->model,
+		   4u * src->width, 4u * dst->width,
+		   src_x, src_y,
+		   dst_x, dst_y,
+		   width, height);
+}
+
+enum start {
+	ZERO,
+	ABOVE,
+	BELOW
+};
+
+static int start_at(int x, enum start s)
+{
+	switch (s) {
+	default:
+	case ZERO:
+		return 0;
+	case ABOVE:
+		return 1;
+	case BELOW:
+		return x - 1;
+	}
+}
+
+igt_main
+{
+	struct device device;
+
+	igt_fixture {
+		device.fd = drm_open_driver_render(DRIVER_INTEL);
+		igt_require_gem(device.fd);
+
+		device.pciid = intel_get_drm_devid(device.fd);
+		device.gen = intel_gen(device.pciid);
+		device.llc = gem_has_llc(device.fd);
+	}
+
+	igt_subtest("basic") {
+		struct buffer *src, *dst;
+		unsigned int x, y;
+
+		for (unsigned int height = 1; height <= 16; height <<= 1) {
+			for (unsigned int y0 = ZERO; y0 <= (height > 2 ? BELOW : ZERO); y0++) {
+				for (unsigned int width = 1; width <= 64; width <<= 1) {
+					for (unsigned int x0 = ZERO; x0 <= (width > 2 ? BELOW : ZERO); x0++) {
+
+						src = buffer_create(&device,
+								    width * 16, height * 4);
+						dst = buffer_create(&device,
+								    width * 16, height * 4);
+
+						y = start_at(height, y0);
+						for (unsigned int src_tiling = I915_TILING_NONE;
+						     src_tiling <= (device.gen >= 6 ? I915_TILING_Y : I915_TILING_X);
+						     src_tiling++) {
+							buffer_set_tiling(&device, src, src_tiling);
+
+							x = start_at(width, x0);
+							for (unsigned int dst_tiling = I915_TILING_NONE;
+							     dst_tiling <= (device.gen >= 6 ? I915_TILING_Y : I915_TILING_X);
+							     dst_tiling++) {
+								buffer_set_tiling(&device, dst, dst_tiling);
+
+								for (enum mode down = CPU; down <= WC; down++) {
+									igt_debug("Testing src_tiling=%d, dst_tiling=%d, down=%d at (%d, %d) x (%d, %d)\n",
+										  src_tiling,
+										  dst_tiling,
+										  down, x, y,
+										  width, height);
+
+									igt_assert(x + width <= dst->width);
+									igt_assert(y + height <= dst->height);
+
+									blit(&device,
+									     src, x, y,
+									     dst, x, y,
+									     width, height);
+									igt_assert(buffer_check(&device, dst, down));
+
+									x += width;
+								}
+							}
+
+							y += height;
+						}
+
+						buffer_free(&device, dst);
+						buffer_free(&device, src);
+					}
+				}
+			}
+		}
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index 3f3eee277..98f2db555 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -106,6 +106,7 @@ i915_progs = [
 	'gen3_render_tiledy_blits',
 	'gem_bad_reloc',
 	'gem_basic',
+	'gem_blits',
 	'gem_busy',
 	'gem_caching',
 	'gem_close',
-- 
2.24.0

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [igt-dev] ✗ GitLab.Pipeline: warning for igt: Another combinatorial exercise for blits (rev2)
  2019-11-05 11:15 [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits Chris Wilson
@ 2019-11-05 11:51 ` Patchwork
  2019-11-05 12:04 ` [igt-dev] ✓ Fi.CI.BAT: success " Patchwork
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2019-11-05 11:51 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: igt: Another combinatorial exercise for blits (rev2)
URL   : https://patchwork.freedesktop.org/series/47249/
State : warning

== Summary ==

Did not get list of undocumented tests for this run, something is wrong!

Other than that, pipeline status: FAILED.

see https://gitlab.freedesktop.org/gfx-ci/igt-ci-tags/pipelines/76303 for more details

== Logs ==

For more details see: https://gitlab.freedesktop.org/gfx-ci/igt-ci-tags/pipelines/76303
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for igt: Another combinatorial exercise for blits (rev2)
  2019-11-05 11:15 [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits Chris Wilson
  2019-11-05 11:51 ` [igt-dev] ✗ GitLab.Pipeline: warning for igt: Another combinatorial exercise for blits (rev2) Patchwork
@ 2019-11-05 12:04 ` Patchwork
  2019-11-05 13:25 ` [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits Katarzyna Dec
  2019-11-05 21:48 ` [igt-dev] ✗ Fi.CI.IGT: failure for igt: Another combinatorial exercise for blits (rev2) Patchwork
  3 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2019-11-05 12:04 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: igt: Another combinatorial exercise for blits (rev2)
URL   : https://patchwork.freedesktop.org/series/47249/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_7261 -> IGTPW_3651
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/index.html

Known issues
------------

  Here are the changes found in IGTPW_3651 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_flink_basic@bad-flink:
    - fi-icl-u3:          [PASS][1] -> [DMESG-WARN][2] ([fdo#107724])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/fi-icl-u3/igt@gem_flink_basic@bad-flink.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/fi-icl-u3/igt@gem_flink_basic@bad-flink.html

  * igt@i915_selftest@live_gem_contexts:
    - fi-bsw-nick:        [PASS][3] -> [INCOMPLETE][4] ([fdo# 111542])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/fi-bsw-nick/igt@i915_selftest@live_gem_contexts.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/fi-bsw-nick/igt@i915_selftest@live_gem_contexts.html

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-icl-u2:          [PASS][5] -> [FAIL][6] ([fdo#109483])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/fi-icl-u2/igt@kms_chamelium@hdmi-hpd-fast.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/fi-icl-u2/igt@kms_chamelium@hdmi-hpd-fast.html

  
#### Possible fixes ####

  * igt@gem_close_race@basic-process:
    - fi-icl-u3:          [DMESG-WARN][7] ([fdo#107724]) -> [PASS][8] +1 similar issue
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/fi-icl-u3/igt@gem_close_race@basic-process.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/fi-icl-u3/igt@gem_close_race@basic-process.html

  
  [fdo# 111542]: https://bugs.freedesktop.org/show_bug.cgi?id= 111542
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#109483]: https://bugs.freedesktop.org/show_bug.cgi?id=109483


Participating hosts (51 -> 42)
------------------------------

  Additional (1): fi-tgl-u 
  Missing    (10): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-bwr-2160 fi-ctg-p8600 fi-gdg-551 fi-bdw-samus fi-byt-clapper fi-skl-6600u 


Build changes
-------------

  * CI: CI-20190529 -> None
  * IGT: IGT_5262 -> IGTPW_3651

  CI-20190529: 20190529
  CI_DRM_7261: 0df0545b3693fbc84f91af17499f204437e7ac6a @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_3651: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/index.html
  IGT_5262: 2d2fd6ce47b2eac83732c5c88cd0d7e5f13013a2 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools



== Testlist changes ==

+igt@gem_blits@basic

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/index.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits
  2019-11-05 11:15 [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits Chris Wilson
  2019-11-05 11:51 ` [igt-dev] ✗ GitLab.Pipeline: warning for igt: Another combinatorial exercise for blits (rev2) Patchwork
  2019-11-05 12:04 ` [igt-dev] ✓ Fi.CI.BAT: success " Patchwork
@ 2019-11-05 13:25 ` Katarzyna Dec
  2019-11-05 23:50   ` Vanshidhar Konda
  2019-11-05 21:48 ` [igt-dev] ✗ Fi.CI.IGT: failure for igt: Another combinatorial exercise for blits (rev2) Patchwork
  3 siblings, 1 reply; 11+ messages in thread
From: Katarzyna Dec @ 2019-11-05 13:25 UTC (permalink / raw)
  To: Chris Wilson, igt-dev

On Tue, Nov 05, 2019 at 11:15:13AM +0000, Chris Wilson wrote:
> The aim of this test is to combine gem_linear_blits, gem_tiled_blits etc
> into one test runner that covers investigation into HW alignment issues
> as well as driver boundaries (relocs, access, thrashing).
> 
> See also gem_concurrent_blits which looks for coherency issues between
> parallel execution.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Katarzyna Dec <katarzyna.dec@intel.com>
> Cc: Vanshidhar Konda <vanshidhar.r.konda@intel.com>
> ---
>  tests/Makefile.sources |   1 +
>  tests/i915/gem_blits.c | 796 +++++++++++++++++++++++++++++++++++++++++
>  tests/meson.build      |   1 +
>  3 files changed, 798 insertions(+)
>  create mode 100644 tests/i915/gem_blits.c
> 
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index 093eb57f3..f63602ef0 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -24,6 +24,7 @@ TESTS_progs = \
>  	drm_import_export \
>  	drm_mm \
>  	drm_read \
> +	i915/gem_blits \
>  	kms_3d \
>  	kms_addfb_basic \
>  	kms_atomic \
> diff --git a/tests/i915/gem_blits.c b/tests/i915/gem_blits.c
> new file mode 100644
> index 000000000..e6a840ffb
> --- /dev/null
> +++ b/tests/i915/gem_blits.c
> @@ -0,0 +1,796 @@
> +/*
> + * Copyright © 2018 Intel Corporation
Minor typo, but as this is a new file, it would be good to update date :)
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "igt.h"
> +#include "igt_x86.h"
> +
> +#define MI_FLUSH_DW (0x26 << 23)
> +
> +#define BCS_SWCTRL 0x22200
> +#define BCS_SRC_Y (1 << 0)
> +#define BCS_DST_Y (1 << 1)
> +
> +struct device {
> +	int fd;
> +	int gen;
> +	int pciid;
> +	int llc;
> +};
> +
> +struct buffer {
> +	uint32_t handle;
> +	uint16_t width;
> +	uint16_t height;
> +	uint16_t stride;
> +	uint32_t size;
> +	unsigned int tiling;
> +	unsigned int caching;
> +	uint64_t gtt_offset;
> +	uint32_t model[] __attribute__((aligned(16)));
> +};
> +
> +enum mode {
> +	CPU,
> +	PRW,
> +	GTT,
> +	WC,
> +};
> +
> +static int fls(uint64_t x)
> +{
> +	int t;
> +
> +	for (t = 0; x >> t; t++)
> +		;
> +
> +	return t;
> +}
> +
> +static unsigned int
> +get_tiling_stride(const struct device *device,
> +		  unsigned int width, unsigned int tiling)
> +{
> +	unsigned int stride = 4u * width;
> +
> +	if (tiling) {
> +		if (device->gen < 3)
> +			stride = ALIGN(stride, 128);
> +		else if (device->gen < 4 || tiling == I915_TILING_X)
> +			stride = ALIGN(stride, 512);
> +		else
> +			stride = ALIGN(stride, 128);
> +		if (device->gen < 4)
> +			stride = 1 << fls(stride - 1);
> +	} else {
> +		if (device->gen >= 8)
> +			stride = ALIGN(stride, 64);
> +	}
> +
> +	igt_assert(stride < UINT16_MAX && stride >= 4*width);
> +	return stride;
> +}
> +
> +static unsigned int
> +get_tiling_height(const struct device *device,
> +		  unsigned int height, unsigned int tiling)
> +{
> +	if (!tiling)
> +		return height;
> +
> +	if (device->gen < 3)
> +		return ALIGN(height, 16);
> +	else if (device->gen < 4 || tiling == I915_TILING_X)
> +		return ALIGN(height, 8);
> +	else
> +		return ALIGN(height, 32);
> +}
> +
> +static struct buffer *buffer_create(const struct device *device,
> +				    unsigned int width,
> +				    unsigned int height)
> +{
> +	struct buffer *buffer;
> +
> +	igt_assert(width && height);
> +
> +	buffer = malloc(sizeof(*buffer) + 4u * width * height);
> +	if (!buffer)
> +		return NULL;
> +
> +	buffer->width = width;
> +	buffer->height = height;
> +
> +	buffer->tiling = I915_TILING_NONE;
> +	buffer->stride = get_tiling_stride(device, width, I915_TILING_NONE);
> +	buffer->size = ALIGN(buffer->stride * height, 4096);
> +	buffer->handle = gem_create(device->fd, buffer->size);
> +	buffer->caching = device->llc;
> +
> +	buffer->gtt_offset = buffer->handle * buffer->size;
> +
> +	for (int y = 0; y < height; y++) {
> +		uint32_t *row = buffer->model + y * width;
> +
> +		for (int x = 0; x < width; x++)
> +			row[x] = (y << 16 | x) ^ buffer->handle;
> +
> +		gem_write(device->fd,
> +			  buffer->handle, 4u * y * width,
> +			  row, 4u * width);
> +	}
> +
> +	return buffer;
> +}
> +
> +static void buffer_set_tiling(const struct device *device,
> +			      struct buffer *buffer,
> +			      unsigned int tiling)
> +{
> +	struct drm_i915_gem_exec_object2 obj[3];
> +	struct drm_i915_gem_relocation_entry reloc[2];
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	const bool has_64b_reloc = device->gen >= 8;
> +	uint32_t stride, size, pitch;
> +	uint32_t *batch;
> +	int i;
> +
> +	if (buffer->tiling == tiling)
> +		return;
> +
> +	stride = get_tiling_stride(device, buffer->width, tiling);
> +	size = stride * get_tiling_height(device, buffer->height, tiling);
> +	size = ALIGN(size, 4096);
> +
> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = to_user_pointer(obj);
> +	execbuf.buffer_count = ARRAY_SIZE(obj);
> +	if (device->gen >= 6)
> +		execbuf.flags = I915_EXEC_BLT;
> +
> +	memset(obj, 0, sizeof(obj));
> +	obj[0].handle = gem_create(device->fd, size);
> +	if (tiling) {
> +		obj[0].flags = EXEC_OBJECT_NEEDS_FENCE;
> +		gem_set_tiling(device->fd, obj[0].handle, tiling, stride);
> +	}
> +
> +	obj[1].handle = buffer->handle;
> +	obj[1].offset = buffer->gtt_offset;
> +	if (buffer->tiling)
> +		obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
> +
> +	obj[2].handle = gem_create(device->fd, 4096);
> +	obj[2].relocs_ptr = to_user_pointer(memset(reloc, 0, sizeof(reloc)));
> +	obj[2].relocation_count = 2;
> +	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
> +
> +	i = 0;
> +
> +	if ((tiling | buffer->tiling) >= I915_TILING_Y) {
> +		unsigned int mask;
> +
> +		batch[i++] = MI_LOAD_REGISTER_IMM;
> +		batch[i++] = BCS_SWCTRL;
> +
> +		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
> +		if (buffer->tiling == I915_TILING_Y)
> +			mask |= BCS_SRC_Y;
> +		if (tiling == I915_TILING_Y)
> +			mask |= BCS_DST_Y;
> +		batch[i++] = mask;
> +	}
> +
> +	batch[i] = (XY_SRC_COPY_BLT_CMD |
> +		    XY_SRC_COPY_BLT_WRITE_ALPHA |
> +		    XY_SRC_COPY_BLT_WRITE_RGB);
> +	if (device->gen >= 4 && buffer->tiling)
> +		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
> +	if (device->gen >= 4 && tiling)
> +		batch[i] |= XY_SRC_COPY_BLT_DST_TILED;
> +	batch[i++] |= 6 + 2 * has_64b_reloc;
> +
> +	pitch = stride;
> +	if (device->gen >= 4 && tiling)
> +		pitch /= 4;
> +	batch[i++] = 3 << 24 | 0xcc << 16 | pitch;
> +	batch[i++] = 0;
> +	batch[i++] = buffer->height << 16 | buffer->width;
> +	reloc[0].target_handle = obj[0].handle;
> +	reloc[0].presumed_offset = obj[0].offset;
> +	reloc[0].offset = sizeof(*batch) * i;
> +	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> +	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = obj[0].offset;
> +	if (has_64b_reloc)
> +		batch[i++] = obj[0].offset >> 32;
> +
> +	batch[i++] = 0;
> +	pitch = buffer->stride;
> +	if (device->gen >= 4 && buffer->tiling)
> +		pitch /= 4;
> +	batch[i++] = pitch;
> +	reloc[1].target_handle = obj[1].handle;
> +	reloc[1].presumed_offset = obj[1].offset;
> +	reloc[1].offset = sizeof(*batch) * i;
> +	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = obj[1].offset;
> +	if (has_64b_reloc)
> +		batch[i++] = obj[1].offset >> 32;
> +
> +	if ((tiling | buffer->tiling) >= I915_TILING_Y) {
> +		igt_assert(device->gen >= 6);
> +		batch[i++] = MI_FLUSH_DW | 2;
> +		batch[i++] = 0;
> +		batch[i++] = 0;
> +		batch[i++] = 0;
> +
> +		batch[i++] = MI_LOAD_REGISTER_IMM;
> +		batch[i++] = BCS_SWCTRL;
> +		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
> +	}
> +
> +	batch[i++] = MI_BATCH_BUFFER_END;
> +	munmap(batch, 4096);
> +
> +	gem_execbuf(device->fd, &execbuf);
> +
> +	gem_close(device->fd, obj[2].handle);
> +	gem_close(device->fd, obj[1].handle);
> +
> +	buffer->gtt_offset = obj[0].offset;
> +	buffer->handle = obj[0].handle;
> +
> +	buffer->tiling = tiling;
> +	buffer->stride = stride;
> +	buffer->size = size;
> +}
> +
> +static bool can_blit_to_linear(const struct device *device,
> +			       const struct buffer *buffer)
> +{
> +	if (buffer->caching && !device->llc)
> +		return false;
> +
> +	if (device->gen < 3)
> +		return false;
> +
> +	return true;
> +}
> +
> +static bool blit_to_linear(const struct device *device,
> +			   const struct buffer *buffer,
> +			   void *linear)
> +{
> +	struct drm_i915_gem_exec_object2 obj[3];
> +	struct drm_i915_gem_relocation_entry reloc[2];
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	const bool has_64b_reloc = device->gen >= 8;
> +	uint32_t *batch;
> +	uint32_t pitch;
> +	int i = 0;
> +
> +	igt_assert(buffer->tiling);
> +
> +	if (!can_blit_to_linear(device, buffer))
> +		return false;
> +
> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = to_user_pointer(obj);
> +	execbuf.buffer_count = ARRAY_SIZE(obj);
> +	if (device->gen >= 6)
> +		execbuf.flags = I915_EXEC_BLT;
> +
> +	memset(obj, 0, sizeof(obj));
> +	if (__gem_userptr(device->fd, linear, buffer->size, 0, 0, &obj[0].handle))
> +		return false;
> +
> +	obj[1].handle = buffer->handle;
> +	obj[1].offset = buffer->gtt_offset;
> +	obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
> +
> +	memset(reloc, 0, sizeof(reloc));
> +	obj[2].handle = gem_create(device->fd, 4096);
> +	obj[2].relocs_ptr = to_user_pointer(reloc);
> +	obj[2].relocation_count = ARRAY_SIZE(reloc);
> +	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
> +
> +	if (buffer->tiling >= I915_TILING_Y) {
> +		unsigned int mask;
> +
> +		batch[i++] = MI_LOAD_REGISTER_IMM;
> +		batch[i++] = BCS_SWCTRL;
> +
> +		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
> +		if (buffer->tiling == I915_TILING_Y)
> +			mask |= BCS_SRC_Y;
> +		batch[i++] = mask;
> +	}
> +
> +	batch[i] = (XY_SRC_COPY_BLT_CMD |
> +		    XY_SRC_COPY_BLT_WRITE_ALPHA |
> +		    XY_SRC_COPY_BLT_WRITE_RGB);
> +	if (device->gen >= 4 && buffer->tiling)
> +		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
> +	batch[i++] |= 6 + 2 * has_64b_reloc;
> +
> +	batch[i++] = 3 << 24 | 0xcc << 16 | buffer->stride;
> +	batch[i++] = 0;
> +	batch[i++] = buffer->height << 16 | buffer->width;
> +	reloc[0].target_handle = obj[0].handle;
> +	reloc[0].presumed_offset = obj[0].offset;
> +	reloc[0].offset = sizeof(*batch) * i;
> +	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> +	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = obj[0].offset;
> +	if (has_64b_reloc)
> +		batch[i++] = obj[0].offset >> 32;
> +
> +	batch[i++] = 0;
> +	pitch = buffer->stride;
> +	if (device->gen >= 4 && buffer->tiling)
> +		pitch /= 4;
> +	batch[i++] = pitch;
> +	reloc[1].target_handle = obj[1].handle;
> +	reloc[1].presumed_offset = obj[1].offset;
> +	reloc[1].offset = sizeof(*batch) * i;
> +	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = obj[1].offset;
> +	if (has_64b_reloc)
> +		batch[i++] = obj[1].offset >> 32;
> +
> +	if (buffer->tiling >= I915_TILING_Y) {
> +		igt_assert(device->gen >= 6);
> +		batch[i++] = MI_FLUSH_DW | 2;
> +		batch[i++] = 0;
> +		batch[i++] = 0;
> +		batch[i++] = 0;
> +
> +		batch[i++] = MI_LOAD_REGISTER_IMM;
> +		batch[i++] = BCS_SWCTRL;
> +		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
> +	}
> +
> +	batch[i++] = MI_BATCH_BUFFER_END;
> +	munmap(batch, 4096);
> +
> +	gem_execbuf(device->fd, &execbuf);
> +	gem_close(device->fd, obj[2].handle);
> +
> +	gem_sync(device->fd, obj[0].handle);
> +	gem_close(device->fd, obj[0].handle);
> +
> +	return true;
> +}
> +
> +static void *download(const struct device *device,
> +		      const struct buffer *buffer,
> +		      enum mode mode)
> +{
> +	void *linear, *src;
> +
> +	igt_assert(posix_memalign(&linear, 4096, buffer->size) == 0);
> +
> +	switch (mode) {
> +	case CPU:
> +		if (buffer->tiling) {
> +			if (blit_to_linear(device, buffer, linear))
> +				return linear;
> +
> +			mode = GTT;
> +		}
> +		break;
> +
> +	case WC:
> +		if (!gem_mmap__has_wc(device->fd) || buffer->tiling)
> +			mode = GTT;
> +		break;
> +
> +	case PRW:
> +		if (buffer->tiling)
> +			mode = GTT;
> +		break;
> +
> +	case GTT:
> +		break;
> +	}
> +
> +	switch (mode) {
> +	case CPU:
> +		src = gem_mmap__cpu(device->fd, buffer->handle,
> +				    0, buffer->size,
> +				    PROT_READ);
> +
> +		gem_set_domain(device->fd, buffer->handle,
> +			       I915_GEM_DOMAIN_CPU, 0);
> +		igt_memcpy_from_wc(linear, src, buffer->size);
> +		munmap(src, buffer->size);
> +		break;
> +
> +	case WC:
> +		src = gem_mmap__wc(device->fd, buffer->handle,
> +				   0, buffer->size,
> +				   PROT_READ);
> +
> +		gem_set_domain(device->fd, buffer->handle,
> +			       I915_GEM_DOMAIN_WC, 0);
> +		igt_memcpy_from_wc(linear, src, buffer->size);
> +		munmap(src, buffer->size);
> +		break;
> +
> +	case GTT:
> +		src = gem_mmap__gtt(device->fd, buffer->handle,
> +				   buffer->size,
> +				   PROT_READ);
> +
> +		gem_set_domain(device->fd, buffer->handle,
> +			       I915_GEM_DOMAIN_GTT, 0);
> +		igt_memcpy_from_wc(linear, src, buffer->size);
> +		munmap(src, buffer->size);
> +		break;
> +
> +	case PRW:
> +		gem_read(device->fd, buffer->handle, 0, linear, buffer->size);
> +		break;
> +	}
> +
> +	return linear;
> +}
> +
> +static bool buffer_check(const struct device *device,
> +			 const struct buffer *buffer,
> +			 enum mode mode)
> +{
> +	unsigned int num_errors = 0;
> +	uint32_t *linear;
> +
> +	linear = download(device, buffer, mode);
> +	igt_assert(linear);
> +
> +	for (int y = 0; y < buffer->height; y++) {
> +		const uint32_t *model = buffer->model + y * buffer->width;
> +		const uint32_t *row =
> +			linear + y * buffer->stride / sizeof(uint32_t);
> +
> +		if (!memcmp(model, row, buffer->width * sizeof(uint32_t)))
> +			continue;
> +
> +		for (int x = 0; x < buffer->width; x++) {
> +			if (row[x] != model[x] && num_errors++ < 5) {
> +				igt_warn("buffer handle=%d mismatch at (%d, %d): expected %08x, found %08x\n",
> +					 buffer->handle,
> +					 x, y, model[x], row[x]);
> +			}
> +		}
> +	}
> +
> +	free(linear);
> +
> +	return num_errors == 0;
> +}
> +
> +static void buffer_free(const struct device *device, struct buffer *buffer)
> +{
> +	igt_assert(buffer_check(device, buffer, GTT));
> +	gem_close(device->fd, buffer->handle);
> +	free(buffer);
> +}
> +
> +static void memcpy_blt(const void *src, void *dst,
> +		       uint32_t src_stride, uint32_t dst_stride,
> +		       uint16_t src_x, uint16_t src_y,
> +		       uint16_t dst_x, uint16_t dst_y,
> +		       uint16_t width, uint16_t height)
> +{
> +	const uint8_t *src_bytes;
> +	uint8_t *dst_bytes;
> +	int byte_width;
> +
> +	src_bytes = (const uint8_t *)src + src_stride * src_y + src_x * 4;
> +	dst_bytes = (uint8_t *)dst + dst_stride * dst_y + dst_x * 4;
> +
> +	byte_width = width * 4;
> +	if (byte_width == src_stride && byte_width == dst_stride) {
> +		byte_width *= height;
> +		height = 1;
> +	}
> +
> +	switch (byte_width) {
> +	case 4:
> +		do {
> +			*(uint32_t *)dst_bytes = *(const uint32_t *)src_bytes;
> +			src_bytes += src_stride;
> +			dst_bytes += dst_stride;
> +		} while (--height);
> +		break;
> +
> +	case 8:
> +		do {
> +			*(uint64_t *)dst_bytes = *(const uint64_t *)src_bytes;
> +			src_bytes += src_stride;
> +			dst_bytes += dst_stride;
> +		} while (--height);
> +		break;
> +	case 16:
> +		do {
> +			((uint64_t *)dst_bytes)[0] = ((const uint64_t *)src_bytes)[0];
> +			((uint64_t *)dst_bytes)[1] = ((const uint64_t *)src_bytes)[1];
> +			src_bytes += src_stride;
> +			dst_bytes += dst_stride;
> +		} while (--height);
> +		break;
> +
> +	default:
> +		do {
> +			memcpy(dst_bytes, src_bytes, byte_width);
> +			src_bytes += src_stride;
> +			dst_bytes += dst_stride;
> +		} while (--height);
> +		break;
> +	}
> +}
> +
> +static void
> +blit(const struct device *device,
> +     struct buffer *src, uint16_t src_x, uint16_t src_y,
> +     struct buffer *dst, uint16_t dst_x, uint16_t dst_y,
> +     uint16_t width, uint16_t height)
> +
> +{
> +	struct drm_i915_gem_exec_object2 obj[3];
> +	struct drm_i915_gem_relocation_entry reloc[2];
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	const bool has_64b_reloc = device->gen >= 8;
> +	uint32_t *batch;
> +	uint32_t pitch;
> +	int i = 0;
> +
> +	if (src_x < 0) {
> +		width += src_x;
> +		dst_x -= src_x;
> +		src_x  = 0;
> +	}
> +	if (src_y < 0) {
> +		height += src_y;
> +		dst_y  -= src_y;
> +		src_y   = 0;
> +	}
> +
> +	if (dst_x < 0) {
> +		width += dst_x;
> +		src_x -= dst_x;
> +		dst_x  = 0;
> +	}
> +	if (dst_y < 0) {
> +		height += dst_y;
> +		src_y  -= dst_y;
> +		dst_y   = 0;
> +	}
> +
> +	if (src_x + width > src->width)
> +		width = src->width - src_x;
> +	if (dst_x + width > dst->width)
> +		width = dst->width - dst_x;
> +
> +	if (src_y + height > src->height)
> +		height = src->height - src_y;
> +	if (dst_y + height > dst->height)
> +		height = dst->height - dst_y;
> +
> +	if (dst->caching) {
> +		igt_assert(device->gen >= 3);
> +		igt_assert(device->llc || !src->caching);
> +	}
> +
> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = to_user_pointer(obj);
> +	execbuf.buffer_count = ARRAY_SIZE(obj);
> +	if (device->gen >= 6)
> +		execbuf.flags = I915_EXEC_BLT;
> +
> +	memset(obj, 0, sizeof(obj));
> +	obj[0].handle = dst->handle;
> +	obj[0].offset = dst->gtt_offset;
> +	if (dst->tiling)
> +		obj[0].flags = EXEC_OBJECT_NEEDS_FENCE;
> +
> +	obj[1].handle = src->handle;
> +	obj[1].offset = src->gtt_offset;
> +	if (src->tiling)
> +		obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
> +
> +	memset(reloc, 0, sizeof(reloc));
> +	obj[2].handle = gem_create(device->fd, 4096);
> +	obj[2].relocs_ptr = to_user_pointer(reloc);
> +	obj[2].relocation_count = ARRAY_SIZE(reloc);
> +	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
> +
> +	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
> +		unsigned int mask;
> +
> +		batch[i++] = MI_LOAD_REGISTER_IMM;
> +		batch[i++] = BCS_SWCTRL;
> +
> +		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
> +		if (src->tiling == I915_TILING_Y)
> +			mask |= BCS_SRC_Y;
> +		if (dst->tiling == I915_TILING_Y)
> +			mask |= BCS_DST_Y;
> +		batch[i++] = mask;
> +	}
> +
> +	batch[i] = (XY_SRC_COPY_BLT_CMD |
> +		    XY_SRC_COPY_BLT_WRITE_ALPHA |
> +		    XY_SRC_COPY_BLT_WRITE_RGB);
> +	if (device->gen >= 4 && src->tiling)
> +		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
> +	if (device->gen >= 4 && dst->tiling)
> +		batch[i] |= XY_SRC_COPY_BLT_DST_TILED;
> +	batch[i++] |= 6 + 2 * has_64b_reloc;
> +
> +	pitch = dst->stride;
> +	if (device->gen >= 4 && dst->tiling)
> +		pitch /= 4;
> +	batch[i++] = 3 << 24 | 0xcc << 16 | pitch;
> +
> +	batch[i++] = dst_y << 16 | dst_x;
> +	batch[i++] = (height + dst_y) << 16 | (width + dst_x);
> +	reloc[0].target_handle = obj[0].handle;
> +	reloc[0].presumed_offset = obj[0].offset;
> +	reloc[0].offset = sizeof(*batch) * i;
> +	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> +	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = obj[0].offset;
> +	if (has_64b_reloc)
> +		batch[i++] = obj[0].offset >> 32;
> +
> +	batch[i++] = src_y << 16 | src_x;
> +	pitch = src->stride;
> +	if (device->gen >= 4 && src->tiling)
> +		pitch /= 4;
> +	batch[i++] = pitch;
> +	reloc[1].target_handle = obj[1].handle;
> +	reloc[1].presumed_offset = obj[1].offset;
> +	reloc[1].offset = sizeof(*batch) * i;
> +	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = obj[1].offset;
> +	if (has_64b_reloc)
> +		batch[i++] = obj[1].offset >> 32;
> +
> +	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
> +		igt_assert(device->gen >= 6);
> +		batch[i++] = MI_FLUSH_DW | 2;
> +		batch[i++] = 0;
> +		batch[i++] = 0;
> +		batch[i++] = 0;
> +
> +		batch[i++] = MI_LOAD_REGISTER_IMM;
> +		batch[i++] = BCS_SWCTRL;
> +		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
> +	}
> +
> +	batch[i++] = MI_BATCH_BUFFER_END;
> +	munmap(batch, 4096);
> +
> +	gem_execbuf(device->fd, &execbuf);
> +	gem_close(device->fd, obj[2].handle);
> +
> +	dst->gtt_offset = obj[0].offset;
> +	src->gtt_offset = obj[1].offset;
> +
> +	memcpy_blt(src->model, dst->model,
> +		   4u * src->width, 4u * dst->width,
> +		   src_x, src_y,
> +		   dst_x, dst_y,
> +		   width, height);
> +}
> +
> +enum start {
> +	ZERO,
> +	ABOVE,
> +	BELOW
> +};
> +
> +static int start_at(int x, enum start s)
> +{
> +	switch (s) {
> +	default:
> +	case ZERO:
> +		return 0;
> +	case ABOVE:
> +		return 1;
> +	case BELOW:
> +		return x - 1;
> +	}
> +}
> +
> +igt_main
> +{
> +	struct device device;
> +
> +	igt_fixture {
> +		device.fd = drm_open_driver_render(DRIVER_INTEL);
> +		igt_require_gem(device.fd);
> +
> +		device.pciid = intel_get_drm_devid(device.fd);
> +		device.gen = intel_gen(device.pciid);
> +		device.llc = gem_has_llc(device.fd);
> +	}
> +
> +	igt_subtest("basic") {
> +		struct buffer *src, *dst;
> +		unsigned int x, y;
> +
> +		for (unsigned int height = 1; height <= 16; height <<= 1) {
> +			for (unsigned int y0 = ZERO; y0 <= (height > 2 ? BELOW : ZERO); y0++) {
> +				for (unsigned int width = 1; width <= 64; width <<= 1) {
> +					for (unsigned int x0 = ZERO; x0 <= (width > 2 ? BELOW : ZERO); x0++) {
> +
> +						src = buffer_create(&device,
> +								    width * 16, height * 4);
> +						dst = buffer_create(&device,
> +								    width * 16, height * 4);
> +
> +						y = start_at(height, y0);
> +						for (unsigned int src_tiling = I915_TILING_NONE;
> +						     src_tiling <= (device.gen >= 6 ? I915_TILING_Y : I915_TILING_X);
> +						     src_tiling++) {
> +							buffer_set_tiling(&device, src, src_tiling);
> +
> +							x = start_at(width, x0);
> +							for (unsigned int dst_tiling = I915_TILING_NONE;
> +							     dst_tiling <= (device.gen >= 6 ? I915_TILING_Y : I915_TILING_X);
> +							     dst_tiling++) {
> +								buffer_set_tiling(&device, dst, dst_tiling);
> +
> +								for (enum mode down = CPU; down <= WC; down++) {
> +									igt_debug("Testing src_tiling=%d, dst_tiling=%d, down=%d at (%d, %d) x (%d, %d)\n",
> +										  src_tiling,
> +										  dst_tiling,
> +										  down, x, y,
> +										  width, height);
> +
> +									igt_assert(x + width <= dst->width);
> +									igt_assert(y + height <= dst->height);
> +
> +									blit(&device,
> +									     src, x, y,
> +									     dst, x, y,
> +									     width, height);
> +									igt_assert(buffer_check(&device, dst, down));
> +
> +									x += width;
> +								}
> +							}
> +
> +							y += height;
> +						}
> +
> +						buffer_free(&device, dst);
> +						buffer_free(&device, src);
> +					}
> +				}
> +			}
> +		}
> +	}
> +}
> diff --git a/tests/meson.build b/tests/meson.build
> index 3f3eee277..98f2db555 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -106,6 +106,7 @@ i915_progs = [
>  	'gen3_render_tiledy_blits',
>  	'gem_bad_reloc',
>  	'gem_basic',
> +	'gem_blits',
>  	'gem_busy',
>  	'gem_caching',
>  	'gem_close',
> -- 
> 2.24.0
>
Code looks good, but I think that Vanshidhar Konda should look more closely at
details.
Kasia
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [igt-dev] ✗ Fi.CI.IGT: failure for igt: Another combinatorial exercise for blits (rev2)
  2019-11-05 11:15 [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits Chris Wilson
                   ` (2 preceding siblings ...)
  2019-11-05 13:25 ` [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits Katarzyna Dec
@ 2019-11-05 21:48 ` Patchwork
  3 siblings, 0 replies; 11+ messages in thread
From: Patchwork @ 2019-11-05 21:48 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: igt: Another combinatorial exercise for blits (rev2)
URL   : https://patchwork.freedesktop.org/series/47249/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_7261_full -> IGTPW_3651_full
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with IGTPW_3651_full absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in IGTPW_3651_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/index.html

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in IGTPW_3651_full:

### IGT changes ###

#### Possible regressions ####

  * igt@kms_plane@pixel-format-pipe-c-planes-source-clamping:
    - shard-iclb:         [PASS][1] -> [FAIL][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-iclb8/igt@kms_plane@pixel-format-pipe-c-planes-source-clamping.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-iclb7/igt@kms_plane@pixel-format-pipe-c-planes-source-clamping.html

  
#### Suppressed ####

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@kms_atomic_transition@4x-modeset-transitions-fencing:
    - {shard-tglb}:       [SKIP][3] ([fdo#112041]) -> [SKIP][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-tglb2/igt@kms_atomic_transition@4x-modeset-transitions-fencing.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-tglb9/igt@kms_atomic_transition@4x-modeset-transitions-fencing.html

  * igt@kms_chamelium@hdmi-cmp-nv16:
    - {shard-tglb}:       [SKIP][5] ([fdo#111827 ]) -> [SKIP][6] +4 similar issues
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-tglb3/igt@kms_chamelium@hdmi-cmp-nv16.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-tglb9/igt@kms_chamelium@hdmi-cmp-nv16.html

  * igt@kms_content_protection@mei_interface:
    - {shard-tglb}:       [SKIP][7] ([fdo#111828]) -> [SKIP][8]
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-tglb1/igt@kms_content_protection@mei_interface.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-tglb9/igt@kms_content_protection@mei_interface.html

  * igt@kms_flip@2x-blocking-absolute-wf_vblank-interruptible:
    - {shard-tglb}:       [SKIP][9] ([fdo#111825]) -> [SKIP][10] +18 similar issues
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-tglb8/igt@kms_flip@2x-blocking-absolute-wf_vblank-interruptible.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-tglb9/igt@kms_flip@2x-blocking-absolute-wf_vblank-interruptible.html

  * igt@kms_frontbuffer_tracking@fbc-2p-primscrn-shrfb-plflip-blt:
    - {shard-tglb}:       NOTRUN -> [SKIP][11] +9 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-tglb9/igt@kms_frontbuffer_tracking@fbc-2p-primscrn-shrfb-plflip-blt.html

  
New tests
---------

  New tests have been introduced between CI_DRM_7261_full and IGTPW_3651_full:

### New IGT tests (1) ###

  * igt@gem_blits@basic:
    - Statuses : 7 pass(s)
    - Exec time: [1.49, 10.04] s

  

Known issues
------------

  Here are the changes found in IGTPW_3651_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_exec@basic-invalid-context-vcs1:
    - shard-iclb:         [PASS][12] -> [SKIP][13] ([fdo#112080]) +12 similar issues
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-iclb2/igt@gem_ctx_exec@basic-invalid-context-vcs1.html
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-iclb7/igt@gem_ctx_exec@basic-invalid-context-vcs1.html

  * igt@gem_ctx_isolation@vcs1-dirty-create:
    - shard-iclb:         [PASS][14] -> [SKIP][15] ([fdo#109276] / [fdo#112080]) +1 similar issue
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-iclb4/igt@gem_ctx_isolation@vcs1-dirty-create.html
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-iclb5/igt@gem_ctx_isolation@vcs1-dirty-create.html

  * igt@gem_exec_schedule@preemptive-hang-bsd:
    - shard-iclb:         [PASS][16] -> [SKIP][17] ([fdo#112146]) +6 similar issues
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-iclb6/igt@gem_exec_schedule@preemptive-hang-bsd.html
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-iclb2/igt@gem_exec_schedule@preemptive-hang-bsd.html

  * igt@gem_persistent_relocs@forked-interruptible-thrashing:
    - shard-kbl:          [PASS][18] -> [FAIL][19] ([fdo#112037])
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-kbl3/igt@gem_persistent_relocs@forked-interruptible-thrashing.html
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-kbl2/igt@gem_persistent_relocs@forked-interruptible-thrashing.html

  * igt@gem_userptr_blits@map-fixed-invalidate-busy:
    - shard-snb:          [PASS][20] -> [DMESG-WARN][21] ([fdo#111870]) +3 similar issues
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-snb2/igt@gem_userptr_blits@map-fixed-invalidate-busy.html
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-snb2/igt@gem_userptr_blits@map-fixed-invalidate-busy.html

  * igt@gem_userptr_blits@sync-unmap-after-close:
    - shard-hsw:          [PASS][22] -> [DMESG-WARN][23] ([fdo#111870]) +2 similar issues
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-hsw2/igt@gem_userptr_blits@sync-unmap-after-close.html
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-hsw7/igt@gem_userptr_blits@sync-unmap-after-close.html

  * igt@i915_selftest@live_hangcheck:
    - shard-hsw:          [PASS][24] -> [DMESG-FAIL][25] ([fdo#111991])
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-hsw1/igt@i915_selftest@live_hangcheck.html
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-hsw1/igt@i915_selftest@live_hangcheck.html

  * igt@kms_cursor_crc@pipe-c-cursor-suspend:
    - shard-kbl:          [PASS][26] -> [DMESG-WARN][27] ([fdo#108566]) +7 similar issues
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-kbl2/igt@kms_cursor_crc@pipe-c-cursor-suspend.html
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-kbl2/igt@kms_cursor_crc@pipe-c-cursor-suspend.html

  * igt@kms_cursor_legacy@cursora-vs-flipb-atomic:
    - shard-hsw:          [PASS][28] -> [INCOMPLETE][29] ([fdo#103540])
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-hsw8/igt@kms_cursor_legacy@cursora-vs-flipb-atomic.html
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-hsw6/igt@kms_cursor_legacy@cursora-vs-flipb-atomic.html

  * igt@kms_flip@flip-vs-suspend-interruptible:
    - shard-apl:          [PASS][30] -> [DMESG-WARN][31] ([fdo#108566]) +2 similar issues
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-apl1/igt@kms_flip@flip-vs-suspend-interruptible.html
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-apl1/igt@kms_flip@flip-vs-suspend-interruptible.html

  * igt@kms_frontbuffer_tracking@fbc-badstride:
    - shard-iclb:         [PASS][32] -> [FAIL][33] ([fdo#103167]) +3 similar issues
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-iclb6/igt@kms_frontbuffer_tracking@fbc-badstride.html
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-iclb4/igt@kms_frontbuffer_tracking@fbc-badstride.html

  * igt@kms_psr@no_drrs:
    - shard-iclb:         [PASS][34] -> [FAIL][35] ([fdo#108341])
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-iclb4/igt@kms_psr@no_drrs.html
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-iclb1/igt@kms_psr@no_drrs.html

  * igt@kms_psr@psr2_cursor_render:
    - shard-iclb:         [PASS][36] -> [SKIP][37] ([fdo#109441]) +2 similar issues
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-iclb2/igt@kms_psr@psr2_cursor_render.html
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-iclb5/igt@kms_psr@psr2_cursor_render.html

  * igt@prime_busy@hang-bsd2:
    - shard-iclb:         [PASS][38] -> [SKIP][39] ([fdo#109276]) +18 similar issues
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-iclb1/igt@prime_busy@hang-bsd2.html
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-iclb6/igt@prime_busy@hang-bsd2.html

  
#### Possible fixes ####

  * igt@gem_busy@busy-vcs1:
    - shard-iclb:         [SKIP][40] ([fdo#112080]) -> [PASS][41] +12 similar issues
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-iclb6/igt@gem_busy@busy-vcs1.html
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-iclb4/igt@gem_busy@busy-vcs1.html

  * {igt@gem_ctx_persistence@vcs1-queued}:
    - shard-iclb:         [SKIP][42] ([fdo#109276] / [fdo#112080]) -> [PASS][43] +3 similar issues
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-iclb6/igt@gem_ctx_persistence@vcs1-queued.html
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-iclb1/igt@gem_ctx_persistence@vcs1-queued.html

  * igt@gem_exec_balancer@smoke:
    - shard-iclb:         [SKIP][44] ([fdo#110854]) -> [PASS][45]
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-iclb6/igt@gem_exec_balancer@smoke.html
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-iclb1/igt@gem_exec_balancer@smoke.html

  * igt@gem_exec_schedule@preempt-contexts-bsd2:
    - shard-iclb:         [SKIP][46] ([fdo#109276]) -> [PASS][47] +11 similar issues
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-iclb6/igt@gem_exec_schedule@preempt-contexts-bsd2.html
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-iclb4/igt@gem_exec_schedule@preempt-contexts-bsd2.html

  * igt@gem_exec_schedule@preempt-other-chain-bsd:
    - shard-iclb:         [SKIP][48] ([fdo#112146]) -> [PASS][49] +2 similar issues
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-iclb2/igt@gem_exec_schedule@preempt-other-chain-bsd.html
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-iclb6/igt@gem_exec_schedule@preempt-other-chain-bsd.html

  * igt@gem_exec_schedule@preempt-queue-render:
    - {shard-tglb}:       [INCOMPLETE][50] ([fdo#111606] / [fdo#111677]) -> [PASS][51]
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-tglb6/igt@gem_exec_schedule@preempt-queue-render.html
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-tglb2/igt@gem_exec_schedule@preempt-queue-render.html

  * {igt@gem_exec_suspend@basic-s0}:
    - {shard-tglb}:       [INCOMPLETE][52] ([fdo#111832]) -> [PASS][53]
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-tglb9/igt@gem_exec_suspend@basic-s0.html
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-tglb9/igt@gem_exec_suspend@basic-s0.html

  * igt@gem_persistent_relocs@forked-thrashing:
    - shard-snb:          [FAIL][54] ([fdo#112037]) -> [PASS][55]
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-snb1/igt@gem_persistent_relocs@forked-thrashing.html
   [55]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-snb2/igt@gem_persistent_relocs@forked-thrashing.html

  * igt@gem_userptr_blits@map-fixed-invalidate-busy:
    - shard-kbl:          [INCOMPLETE][56] ([fdo#103665]) -> [PASS][57]
   [56]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-kbl4/igt@gem_userptr_blits@map-fixed-invalidate-busy.html
   [57]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-kbl4/igt@gem_userptr_blits@map-fixed-invalidate-busy.html

  * igt@gem_userptr_blits@sync-unmap-cycles:
    - shard-snb:          [DMESG-WARN][58] ([fdo#111870]) -> [PASS][59] +2 similar issues
   [58]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-snb2/igt@gem_userptr_blits@sync-unmap-cycles.html
   [59]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-snb1/igt@gem_userptr_blits@sync-unmap-cycles.html
    - shard-hsw:          [DMESG-WARN][60] ([fdo#111870]) -> [PASS][61] +2 similar issues
   [60]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-hsw5/igt@gem_userptr_blits@sync-unmap-cycles.html
   [61]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-hsw5/igt@gem_userptr_blits@sync-unmap-cycles.html

  * igt@gem_workarounds@suspend-resume:
    - shard-apl:          [DMESG-WARN][62] ([fdo#108566]) -> [PASS][63]
   [62]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-apl6/igt@gem_workarounds@suspend-resume.html
   [63]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-apl1/igt@gem_workarounds@suspend-resume.html

  * igt@i915_suspend@debugfs-reader:
    - {shard-tglb}:       [INCOMPLETE][64] ([fdo#111832] / [fdo#111850]) -> [PASS][65] +1 similar issue
   [64]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-tglb2/igt@i915_suspend@debugfs-reader.html
   [65]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-tglb8/igt@i915_suspend@debugfs-reader.html

  * igt@kms_cursor_crc@pipe-b-cursor-256x256-onscreen:
    - shard-snb:          [SKIP][66] ([fdo#109271]) -> [PASS][67] +1 similar issue
   [66]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-snb6/igt@kms_cursor_crc@pipe-b-cursor-256x256-onscreen.html
   [67]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-snb7/igt@kms_cursor_crc@pipe-b-cursor-256x256-onscreen.html

  * igt@kms_cursor_legacy@cursora-vs-flipb-varying-size:
    - shard-hsw:          [SKIP][68] ([fdo#109271]) -> [PASS][69]
   [68]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-hsw4/igt@kms_cursor_legacy@cursora-vs-flipb-varying-size.html
   [69]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-hsw1/igt@kms_cursor_legacy@cursora-vs-flipb-varying-size.html

  * igt@kms_draw_crc@draw-method-xrgb2101010-mmap-wc-xtiled:
    - shard-kbl:          [TIMEOUT][70] ([fdo#111800]) -> [PASS][71]
   [70]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-kbl4/igt@kms_draw_crc@draw-method-xrgb2101010-mmap-wc-xtiled.html
   [71]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-kbl4/igt@kms_draw_crc@draw-method-xrgb2101010-mmap-wc-xtiled.html

  * igt@kms_flip@flip-vs-suspend-interruptible:
    - shard-hsw:          [INCOMPLETE][72] ([fdo#103540]) -> [PASS][73]
   [72]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-hsw6/igt@kms_flip@flip-vs-suspend-interruptible.html
   [73]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-hsw4/igt@kms_flip@flip-vs-suspend-interruptible.html

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-render:
    - shard-iclb:         [FAIL][74] ([fdo#103167]) -> [PASS][75] +2 similar issues
   [74]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-iclb4/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-render.html
   [75]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-iclb5/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-cur-indfb-draw-render.html

  * igt@kms_frontbuffer_tracking@fbc-suspend:
    - shard-kbl:          [DMESG-WARN][76] ([fdo#108566]) -> [PASS][77] +5 similar issues
   [76]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-kbl3/igt@kms_frontbuffer_tracking@fbc-suspend.html
   [77]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-kbl6/igt@kms_frontbuffer_tracking@fbc-suspend.html
    - {shard-tglb}:       [INCOMPLETE][78] ([fdo#111832] / [fdo#111850] / [fdo#111884]) -> [PASS][79]
   [78]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-tglb7/igt@kms_frontbuffer_tracking@fbc-suspend.html
   [79]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-tglb4/igt@kms_frontbuffer_tracking@fbc-suspend.html

  * igt@kms_plane_lowres@pipe-a-tiling-y:
    - shard-iclb:         [FAIL][80] ([fdo#103166]) -> [PASS][81]
   [80]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-iclb1/igt@kms_plane_lowres@pipe-a-tiling-y.html
   [81]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-iclb8/igt@kms_plane_lowres@pipe-a-tiling-y.html

  * igt@kms_psr@psr2_primary_mmap_cpu:
    - shard-iclb:         [SKIP][82] ([fdo#109441]) -> [PASS][83]
   [82]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-iclb6/igt@kms_psr@psr2_primary_mmap_cpu.html
   [83]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-iclb2/igt@kms_psr@psr2_primary_mmap_cpu.html

  * igt@kms_setmode@basic:
    - shard-hsw:          [FAIL][84] ([fdo#99912]) -> [PASS][85]
   [84]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-hsw4/igt@kms_setmode@basic.html
   [85]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-hsw7/igt@kms_setmode@basic.html

  * igt@perf_pmu@cpu-hotplug:
    - shard-kbl:          [TIMEOUT][86] ([fdo#111546] / [fdo#111800]) -> [PASS][87]
   [86]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-kbl4/igt@perf_pmu@cpu-hotplug.html
   [87]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-kbl1/igt@perf_pmu@cpu-hotplug.html

  
#### Warnings ####

  * igt@gem_mocs_settings@mocs-reset-bsd2:
    - shard-iclb:         [SKIP][88] ([fdo#109276]) -> [FAIL][89] ([fdo#111330])
   [88]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_7261/shard-iclb8/igt@gem_mocs_settings@mocs-reset-bsd2.html
   [89]: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/shard-iclb1/igt@gem_mocs_settings@mocs-reset-bsd2.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo# 112000 ]: https://bugs.freedesktop.org/show_bug.cgi?id= 112000 
  [fdo#103166]: https://bugs.freedesktop.org/show_bug.cgi?id=103166
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103540]: https://bugs.freedesktop.org/show_bug.cgi?id=103540
  [fdo#103665]: https://bugs.freedesktop.org/show_bug.cgi?id=103665
  [fdo#105411]: https://bugs.freedesktop.org/show_bug.cgi?id=105411
  [fdo#108341]: https://bugs.freedesktop.org/show_bug.cgi?id=108341
  [fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109276]: https://bugs.freedesktop.org/show_bug.cgi?id=109276
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#110854]: https://bugs.freedesktop.org/show_bug.cgi?id=110854
  [fdo#111329]: https://bugs.freedesktop.org/show_bug.cgi?id=111329
  [fdo#111330]: https://bugs.freedesktop.org/show_bug.cgi?id=111330
  [fdo#111546]: https://bugs.freedesktop.org/show_bug.cgi?id=111546
  [fdo#111606]: https://bugs.freedesktop.org/show_bug.cgi?id=111606
  [fdo#111647]: https://bugs.freedesktop.org/show_bug.cgi?id=111647
  [fdo#111677]: https://bugs.freedesktop.org/show_bug.cgi?id=111677
  [fdo#111703]: https://bugs.freedesktop.org/show_bug.cgi?id=111703
  [fdo#111747]: https://bugs.freedesktop.org/show_bug.cgi?id=111747
  [fdo#111766]: https://bugs.freedesktop.org/show_bug.cgi?id=111766
  [fdo#111781]: https://bugs.freedesktop.org/show_bug.cgi?id=111781
  [fdo#111800]: https://bugs.freedesktop.org/show_bug.cgi?id=111800
  [fdo#111825]: https://bugs.freedesktop.org/show_bug.cgi?id=111825
  [fdo#111827 ]: https://bugs.freedesktop.org/show_bug.cgi?id=111827 
  [fdo#111828]: https://bugs.freedesktop.org/show_bug.cgi?id=111828
  [fdo#111831]: https://bugs.freedesktop.org/show_bug.cgi?id=111831
  [fdo#111832]: https://bugs.freedesktop.org/show_bug.cgi?id=111832
  [fdo#111850]: https://bugs.freedesktop.org/show_bug.cgi?id=111850
  [fdo#111870]: https://bugs.freedesktop.org/show_bug.cgi?id=111870
  [fdo#111884]: https://bugs.freedesktop.org/show_bug.cgi?id=111884
  [fdo#111991]: https://bugs.freedesktop.org/show_bug.cgi?id=111991
  [fdo#111998]: https://bugs.freedesktop.org/show_bug.cgi?id=111998
  [fdo#112016 ]: https://bugs.freedesktop.org/show_bug.cgi?id=112016 
  [fdo#112021 ]: https://bugs.freed

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_3651/index.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits
  2019-11-05 13:25 ` [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits Katarzyna Dec
@ 2019-11-05 23:50   ` Vanshidhar Konda
  2019-11-06  9:22     ` Chris Wilson
  0 siblings, 1 reply; 11+ messages in thread
From: Vanshidhar Konda @ 2019-11-05 23:50 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

On Tue, Nov 05, 2019 at 02:25:28PM +0100, Katarzyna Dec wrote:
>On Tue, Nov 05, 2019 at 11:15:13AM +0000, Chris Wilson wrote:
>> The aim of this test is to combine gem_linear_blits, gem_tiled_blits etc
>> into one test runner that covers investigation into HW alignment issues
>> as well as driver boundaries (relocs, access, thrashing).
>>
>> See also gem_concurrent_blits which looks for coherency issues between
>> parallel execution.
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Katarzyna Dec <katarzyna.dec@intel.com>
>> Cc: Vanshidhar Konda <vanshidhar.r.konda@intel.com>
>> ---
>>  tests/Makefile.sources |   1 +
>>  tests/i915/gem_blits.c | 796 +++++++++++++++++++++++++++++++++++++++++
>>  tests/meson.build      |   1 +
>>  3 files changed, 798 insertions(+)
>>  create mode 100644 tests/i915/gem_blits.c
>>
>> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
>> index 093eb57f3..f63602ef0 100644
>> --- a/tests/Makefile.sources
>> +++ b/tests/Makefile.sources
>> @@ -24,6 +24,7 @@ TESTS_progs = \
>>  	drm_import_export \
>>  	drm_mm \
>>  	drm_read \
>> +	i915/gem_blits \
>>  	kms_3d \
>>  	kms_addfb_basic \
>>  	kms_atomic \
>> diff --git a/tests/i915/gem_blits.c b/tests/i915/gem_blits.c
>> new file mode 100644
>> index 000000000..e6a840ffb
>> --- /dev/null
>> +++ b/tests/i915/gem_blits.c
>> @@ -0,0 +1,796 @@
>> +/*
>> + * Copyright © 2018 Intel Corporation
>Minor typo, but as this is a new file, it would be good to update date :)
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#include "igt.h"
>> +#include "igt_x86.h"
>> +
>> +#define MI_FLUSH_DW (0x26 << 23)
>> +
>> +#define BCS_SWCTRL 0x22200
>> +#define BCS_SRC_Y (1 << 0)
>> +#define BCS_DST_Y (1 << 1)
>> +
>> +struct device {
>> +	int fd;
>> +	int gen;
>> +	int pciid;
>> +	int llc;
>> +};
>> +
>> +struct buffer {
>> +	uint32_t handle;
>> +	uint16_t width;
>> +	uint16_t height;
>> +	uint16_t stride;
>> +	uint32_t size;
>> +	unsigned int tiling;
>> +	unsigned int caching;
>> +	uint64_t gtt_offset;
>> +	uint32_t model[] __attribute__((aligned(16)));
>> +};
>> +
>> +enum mode {
>> +	CPU,
>> +	PRW,
>> +	GTT,
>> +	WC,
>> +};
>> +
>> +static int fls(uint64_t x)
>> +{
>> +	int t;
>> +
>> +	for (t = 0; x >> t; t++)
>> +		;
>> +
>> +	return t;
>> +}
>> +
>> +static unsigned int
>> +get_tiling_stride(const struct device *device,
>> +		  unsigned int width, unsigned int tiling)
>> +{
>> +	unsigned int stride = 4u * width;
>> +
>> +	if (tiling) {
>> +		if (device->gen < 3)
>> +			stride = ALIGN(stride, 128);
>> +		else if (device->gen < 4 || tiling == I915_TILING_X)
>> +			stride = ALIGN(stride, 512);
>> +		else
>> +			stride = ALIGN(stride, 128);
>> +		if (device->gen < 4)
>> +			stride = 1 << fls(stride - 1);
>> +	} else {
>> +		if (device->gen >= 8)
>> +			stride = ALIGN(stride, 64);
>> +	}
>> +
>> +	igt_assert(stride < UINT16_MAX && stride >= 4*width);
>> +	return stride;
>> +}
>> +
>> +static unsigned int
>> +get_tiling_height(const struct device *device,
>> +		  unsigned int height, unsigned int tiling)
>> +{
>> +	if (!tiling)
>> +		return height;
>> +
>> +	if (device->gen < 3)
>> +		return ALIGN(height, 16);
>> +	else if (device->gen < 4 || tiling == I915_TILING_X)
>> +		return ALIGN(height, 8);
>> +	else
>> +		return ALIGN(height, 32);
>> +}
>> +
>> +static struct buffer *buffer_create(const struct device *device,
>> +				    unsigned int width,
>> +				    unsigned int height)
>> +{
>> +	struct buffer *buffer;
>> +
>> +	igt_assert(width && height);
>> +
>> +	buffer = malloc(sizeof(*buffer) + 4u * width * height);
>> +	if (!buffer)
>> +		return NULL;
>> +
>> +	buffer->width = width;
>> +	buffer->height = height;
>> +
>> +	buffer->tiling = I915_TILING_NONE;
>> +	buffer->stride = get_tiling_stride(device, width, I915_TILING_NONE);
>> +	buffer->size = ALIGN(buffer->stride * height, 4096);
>> +	buffer->handle = gem_create(device->fd, buffer->size);
>> +	buffer->caching = device->llc;
>> +
>> +	buffer->gtt_offset = buffer->handle * buffer->size;
>> +
>> +	for (int y = 0; y < height; y++) {
>> +		uint32_t *row = buffer->model + y * width;
>> +
>> +		for (int x = 0; x < width; x++)
>> +			row[x] = (y << 16 | x) ^ buffer->handle;
>> +
>> +		gem_write(device->fd,
>> +			  buffer->handle, 4u * y * width,
>> +			  row, 4u * width);
>> +	}
>> +
>> +	return buffer;
>> +}
>> +
>> +static void buffer_set_tiling(const struct device *device,
>> +			      struct buffer *buffer,
>> +			      unsigned int tiling)
>> +{
>> +	struct drm_i915_gem_exec_object2 obj[3];
>> +	struct drm_i915_gem_relocation_entry reloc[2];
>> +	struct drm_i915_gem_execbuffer2 execbuf;
>> +	const bool has_64b_reloc = device->gen >= 8;
>> +	uint32_t stride, size, pitch;
>> +	uint32_t *batch;
>> +	int i;
>> +
>> +	if (buffer->tiling == tiling)
>> +		return;
>> +
>> +	stride = get_tiling_stride(device, buffer->width, tiling);
>> +	size = stride * get_tiling_height(device, buffer->height, tiling);
>> +	size = ALIGN(size, 4096);
>> +
>> +	memset(&execbuf, 0, sizeof(execbuf));
>> +	execbuf.buffers_ptr = to_user_pointer(obj);
>> +	execbuf.buffer_count = ARRAY_SIZE(obj);
>> +	if (device->gen >= 6)
>> +		execbuf.flags = I915_EXEC_BLT;
>> +
>> +	memset(obj, 0, sizeof(obj));
>> +	obj[0].handle = gem_create(device->fd, size);
>> +	if (tiling) {
>> +		obj[0].flags = EXEC_OBJECT_NEEDS_FENCE;
>> +		gem_set_tiling(device->fd, obj[0].handle, tiling, stride);
>> +	}
>> +
>> +	obj[1].handle = buffer->handle;
>> +	obj[1].offset = buffer->gtt_offset;
>> +	if (buffer->tiling)
>> +		obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
>> +
>> +	obj[2].handle = gem_create(device->fd, 4096);
>> +	obj[2].relocs_ptr = to_user_pointer(memset(reloc, 0, sizeof(reloc)));
>> +	obj[2].relocation_count = 2;
>> +	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
>> +
>> +	i = 0;
>> +
>> +	if ((tiling | buffer->tiling) >= I915_TILING_Y) {
>> +		unsigned int mask;
>> +
>> +		batch[i++] = MI_LOAD_REGISTER_IMM;
>> +		batch[i++] = BCS_SWCTRL;
>> +
>> +		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
>> +		if (buffer->tiling == I915_TILING_Y)
>> +			mask |= BCS_SRC_Y;
>> +		if (tiling == I915_TILING_Y)
>> +			mask |= BCS_DST_Y;
>> +		batch[i++] = mask;
>> +	}
>> +
>> +	batch[i] = (XY_SRC_COPY_BLT_CMD |
>> +		    XY_SRC_COPY_BLT_WRITE_ALPHA |
>> +		    XY_SRC_COPY_BLT_WRITE_RGB);
>> +	if (device->gen >= 4 && buffer->tiling)
>> +		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
>> +	if (device->gen >= 4 && tiling)
>> +		batch[i] |= XY_SRC_COPY_BLT_DST_TILED;
>> +	batch[i++] |= 6 + 2 * has_64b_reloc;
>> +
>> +	pitch = stride;
>> +	if (device->gen >= 4 && tiling)
>> +		pitch /= 4;
>> +	batch[i++] = 3 << 24 | 0xcc << 16 | pitch;
>> +	batch[i++] = 0;
>> +	batch[i++] = buffer->height << 16 | buffer->width;
>> +	reloc[0].target_handle = obj[0].handle;
>> +	reloc[0].presumed_offset = obj[0].offset;
>> +	reloc[0].offset = sizeof(*batch) * i;
>> +	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
>> +	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
>> +	batch[i++] = obj[0].offset;
>> +	if (has_64b_reloc)
>> +		batch[i++] = obj[0].offset >> 32;
>> +
>> +	batch[i++] = 0;
>> +	pitch = buffer->stride;
>> +	if (device->gen >= 4 && buffer->tiling)
>> +		pitch /= 4;
>> +	batch[i++] = pitch;
>> +	reloc[1].target_handle = obj[1].handle;
>> +	reloc[1].presumed_offset = obj[1].offset;
>> +	reloc[1].offset = sizeof(*batch) * i;
>> +	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
>> +	batch[i++] = obj[1].offset;
>> +	if (has_64b_reloc)
>> +		batch[i++] = obj[1].offset >> 32;
>> +
>> +	if ((tiling | buffer->tiling) >= I915_TILING_Y) {
>> +		igt_assert(device->gen >= 6);
>> +		batch[i++] = MI_FLUSH_DW | 2;
>> +		batch[i++] = 0;
>> +		batch[i++] = 0;
>> +		batch[i++] = 0;
>> +
>> +		batch[i++] = MI_LOAD_REGISTER_IMM;
>> +		batch[i++] = BCS_SWCTRL;
>> +		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
>> +	}
>> +
>> +	batch[i++] = MI_BATCH_BUFFER_END;
>> +	munmap(batch, 4096);
>> +
>> +	gem_execbuf(device->fd, &execbuf);
>> +
>> +	gem_close(device->fd, obj[2].handle);
>> +	gem_close(device->fd, obj[1].handle);
>> +
>> +	buffer->gtt_offset = obj[0].offset;
>> +	buffer->handle = obj[0].handle;
>> +
>> +	buffer->tiling = tiling;
>> +	buffer->stride = stride;
>> +	buffer->size = size;
>> +}
>> +
>> +static bool can_blit_to_linear(const struct device *device,
>> +			       const struct buffer *buffer)
>> +{
>> +	if (buffer->caching && !device->llc)
>> +		return false;
>> +
>> +	if (device->gen < 3)
>> +		return false;
>> +
>> +	return true;
>> +}
>> +
>> +static bool blit_to_linear(const struct device *device,
>> +			   const struct buffer *buffer,
>> +			   void *linear)
>> +{
>> +	struct drm_i915_gem_exec_object2 obj[3];
>> +	struct drm_i915_gem_relocation_entry reloc[2];
>> +	struct drm_i915_gem_execbuffer2 execbuf;
>> +	const bool has_64b_reloc = device->gen >= 8;
>> +	uint32_t *batch;
>> +	uint32_t pitch;
>> +	int i = 0;
>> +
>> +	igt_assert(buffer->tiling);
>> +
>> +	if (!can_blit_to_linear(device, buffer))
>> +		return false;
>> +
>> +	memset(&execbuf, 0, sizeof(execbuf));
>> +	execbuf.buffers_ptr = to_user_pointer(obj);
>> +	execbuf.buffer_count = ARRAY_SIZE(obj);
>> +	if (device->gen >= 6)
>> +		execbuf.flags = I915_EXEC_BLT;
>> +
>> +	memset(obj, 0, sizeof(obj));
>> +	if (__gem_userptr(device->fd, linear, buffer->size, 0, 0, &obj[0].handle))
>> +		return false;
>> +
>> +	obj[1].handle = buffer->handle;
>> +	obj[1].offset = buffer->gtt_offset;
>> +	obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
>> +
>> +	memset(reloc, 0, sizeof(reloc));
>> +	obj[2].handle = gem_create(device->fd, 4096);
>> +	obj[2].relocs_ptr = to_user_pointer(reloc);
>> +	obj[2].relocation_count = ARRAY_SIZE(reloc);
>> +	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
>> +
>> +	if (buffer->tiling >= I915_TILING_Y) {
>> +		unsigned int mask;
>> +
>> +		batch[i++] = MI_LOAD_REGISTER_IMM;
>> +		batch[i++] = BCS_SWCTRL;
>> +
>> +		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
>> +		if (buffer->tiling == I915_TILING_Y)
>> +			mask |= BCS_SRC_Y;
>> +		batch[i++] = mask;
>> +	}
>> +
>> +	batch[i] = (XY_SRC_COPY_BLT_CMD |
>> +		    XY_SRC_COPY_BLT_WRITE_ALPHA |
>> +		    XY_SRC_COPY_BLT_WRITE_RGB);
>> +	if (device->gen >= 4 && buffer->tiling)
>> +		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
>> +	batch[i++] |= 6 + 2 * has_64b_reloc;
>> +
>> +	batch[i++] = 3 << 24 | 0xcc << 16 | buffer->stride;
>> +	batch[i++] = 0;
>> +	batch[i++] = buffer->height << 16 | buffer->width;
>> +	reloc[0].target_handle = obj[0].handle;
>> +	reloc[0].presumed_offset = obj[0].offset;
>> +	reloc[0].offset = sizeof(*batch) * i;
>> +	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
>> +	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
>> +	batch[i++] = obj[0].offset;
>> +	if (has_64b_reloc)
>> +		batch[i++] = obj[0].offset >> 32;
>> +
>> +	batch[i++] = 0;
>> +	pitch = buffer->stride;
>> +	if (device->gen >= 4 && buffer->tiling)
>> +		pitch /= 4;
>> +	batch[i++] = pitch;
>> +	reloc[1].target_handle = obj[1].handle;
>> +	reloc[1].presumed_offset = obj[1].offset;
>> +	reloc[1].offset = sizeof(*batch) * i;
>> +	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
>> +	batch[i++] = obj[1].offset;
>> +	if (has_64b_reloc)
>> +		batch[i++] = obj[1].offset >> 32;
>> +
>> +	if (buffer->tiling >= I915_TILING_Y) {
>> +		igt_assert(device->gen >= 6);
>> +		batch[i++] = MI_FLUSH_DW | 2;
>> +		batch[i++] = 0;
>> +		batch[i++] = 0;
>> +		batch[i++] = 0;
>> +
>> +		batch[i++] = MI_LOAD_REGISTER_IMM;
>> +		batch[i++] = BCS_SWCTRL;
>> +		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
>> +	}
>> +
>> +	batch[i++] = MI_BATCH_BUFFER_END;
>> +	munmap(batch, 4096);
>> +
>> +	gem_execbuf(device->fd, &execbuf);
>> +	gem_close(device->fd, obj[2].handle);
>> +
>> +	gem_sync(device->fd, obj[0].handle);
>> +	gem_close(device->fd, obj[0].handle);
>> +
>> +	return true;
>> +}
>> +
>> +static void *download(const struct device *device,
>> +		      const struct buffer *buffer,
>> +		      enum mode mode)
>> +{
>> +	void *linear, *src;
>> +
>> +	igt_assert(posix_memalign(&linear, 4096, buffer->size) == 0);
>> +
>> +	switch (mode) {
>> +	case CPU:
>> +		if (buffer->tiling) {
>> +			if (blit_to_linear(device, buffer, linear))
>> +				return linear;
>> +
>> +			mode = GTT;
>> +		}
>> +		break;
>> +
>> +	case WC:
>> +		if (!gem_mmap__has_wc(device->fd) || buffer->tiling)
>> +			mode = GTT;
>> +		break;
>> +
>> +	case PRW:
>> +		if (buffer->tiling)
>> +			mode = GTT;
>> +		break;
>> +
>> +	case GTT:
>> +		break;
>> +	}
>> +
>> +	switch (mode) {
>> +	case CPU:
>> +		src = gem_mmap__cpu(device->fd, buffer->handle,
>> +				    0, buffer->size,
>> +				    PROT_READ);
>> +
>> +		gem_set_domain(device->fd, buffer->handle,
>> +			       I915_GEM_DOMAIN_CPU, 0);
>> +		igt_memcpy_from_wc(linear, src, buffer->size);
>> +		munmap(src, buffer->size);
>> +		break;
>> +
>> +	case WC:
>> +		src = gem_mmap__wc(device->fd, buffer->handle,
>> +				   0, buffer->size,
>> +				   PROT_READ);
>> +
>> +		gem_set_domain(device->fd, buffer->handle,
>> +			       I915_GEM_DOMAIN_WC, 0);
>> +		igt_memcpy_from_wc(linear, src, buffer->size);
>> +		munmap(src, buffer->size);
>> +		break;
>> +
>> +	case GTT:
>> +		src = gem_mmap__gtt(device->fd, buffer->handle,
>> +				   buffer->size,
>> +				   PROT_READ);
>> +
>> +		gem_set_domain(device->fd, buffer->handle,
>> +			       I915_GEM_DOMAIN_GTT, 0);
>> +		igt_memcpy_from_wc(linear, src, buffer->size);
>> +		munmap(src, buffer->size);
>> +		break;
>> +
>> +	case PRW:
>> +		gem_read(device->fd, buffer->handle, 0, linear, buffer->size);
>> +		break;
>> +	}
>> +
>> +	return linear;
>> +}
>> +
>> +static bool buffer_check(const struct device *device,
>> +			 const struct buffer *buffer,
>> +			 enum mode mode)
>> +{
>> +	unsigned int num_errors = 0;
>> +	uint32_t *linear;
>> +
>> +	linear = download(device, buffer, mode);
>> +	igt_assert(linear);
>> +
>> +	for (int y = 0; y < buffer->height; y++) {
>> +		const uint32_t *model = buffer->model + y * buffer->width;
>> +		const uint32_t *row =
>> +			linear + y * buffer->stride / sizeof(uint32_t);
>> +
>> +		if (!memcmp(model, row, buffer->width * sizeof(uint32_t)))
>> +			continue;
>> +
>> +		for (int x = 0; x < buffer->width; x++) {
>> +			if (row[x] != model[x] && num_errors++ < 5) {
>> +				igt_warn("buffer handle=%d mismatch at (%d, %d): expected %08x, found %08x\n",
>> +					 buffer->handle,
>> +					 x, y, model[x], row[x]);
>> +			}
>> +		}
>> +	}
>> +
>> +	free(linear);
>> +
>> +	return num_errors == 0;
>> +}
>> +
>> +static void buffer_free(const struct device *device, struct buffer *buffer)
>> +{
>> +	igt_assert(buffer_check(device, buffer, GTT));
>> +	gem_close(device->fd, buffer->handle);
>> +	free(buffer);
>> +}
>> +
>> +static void memcpy_blt(const void *src, void *dst,
>> +		       uint32_t src_stride, uint32_t dst_stride,
>> +		       uint16_t src_x, uint16_t src_y,
>> +		       uint16_t dst_x, uint16_t dst_y,
>> +		       uint16_t width, uint16_t height)
>> +{
>> +	const uint8_t *src_bytes;
>> +	uint8_t *dst_bytes;
>> +	int byte_width;
>> +
>> +	src_bytes = (const uint8_t *)src + src_stride * src_y + src_x * 4;
>> +	dst_bytes = (uint8_t *)dst + dst_stride * dst_y + dst_x * 4;
>> +
>> +	byte_width = width * 4;
>> +	if (byte_width == src_stride && byte_width == dst_stride) {
>> +		byte_width *= height;
>> +		height = 1;
>> +	}
>> +
>> +	switch (byte_width) {
>> +	case 4:
>> +		do {
>> +			*(uint32_t *)dst_bytes = *(const uint32_t *)src_bytes;
>> +			src_bytes += src_stride;
>> +			dst_bytes += dst_stride;
>> +		} while (--height);
>> +		break;
>> +
>> +	case 8:
>> +		do {
>> +			*(uint64_t *)dst_bytes = *(const uint64_t *)src_bytes;
>> +			src_bytes += src_stride;
>> +			dst_bytes += dst_stride;
>> +		} while (--height);
>> +		break;
>> +	case 16:
>> +		do {
>> +			((uint64_t *)dst_bytes)[0] = ((const uint64_t *)src_bytes)[0];
>> +			((uint64_t *)dst_bytes)[1] = ((const uint64_t *)src_bytes)[1];
>> +			src_bytes += src_stride;
>> +			dst_bytes += dst_stride;
>> +		} while (--height);
>> +		break;
>> +
>> +	default:
>> +		do {
>> +			memcpy(dst_bytes, src_bytes, byte_width);
>> +			src_bytes += src_stride;
>> +			dst_bytes += dst_stride;
>> +		} while (--height);
>> +		break;
>> +	}
>> +}
>> +
>> +static void
>> +blit(const struct device *device,
>> +     struct buffer *src, uint16_t src_x, uint16_t src_y,
>> +     struct buffer *dst, uint16_t dst_x, uint16_t dst_y,
>> +     uint16_t width, uint16_t height)
>> +
>> +{
>> +	struct drm_i915_gem_exec_object2 obj[3];
>> +	struct drm_i915_gem_relocation_entry reloc[2];
>> +	struct drm_i915_gem_execbuffer2 execbuf;
>> +	const bool has_64b_reloc = device->gen >= 8;
>> +	uint32_t *batch;
>> +	uint32_t pitch;
>> +	int i = 0;
>> +
>> +	if (src_x < 0) {
>> +		width += src_x;
>> +		dst_x -= src_x;
>> +		src_x  = 0;
>> +	}
>> +	if (src_y < 0) {
>> +		height += src_y;
>> +		dst_y  -= src_y;
>> +		src_y   = 0;
>> +	}
>> +
>> +	if (dst_x < 0) {
>> +		width += dst_x;
>> +		src_x -= dst_x;
>> +		dst_x  = 0;
>> +	}
>> +	if (dst_y < 0) {
>> +		height += dst_y;
>> +		src_y  -= dst_y;
>> +		dst_y   = 0;
>> +	}
>> +
>> +	if (src_x + width > src->width)
>> +		width = src->width - src_x;
>> +	if (dst_x + width > dst->width)
>> +		width = dst->width - dst_x;
>> +
>> +	if (src_y + height > src->height)
>> +		height = src->height - src_y;
>> +	if (dst_y + height > dst->height)
>> +		height = dst->height - dst_y;
>> +
>> +	if (dst->caching) {
>> +		igt_assert(device->gen >= 3);
>> +		igt_assert(device->llc || !src->caching);
>> +	}
>> +
>> +	memset(&execbuf, 0, sizeof(execbuf));
>> +	execbuf.buffers_ptr = to_user_pointer(obj);
>> +	execbuf.buffer_count = ARRAY_SIZE(obj);
>> +	if (device->gen >= 6)
>> +		execbuf.flags = I915_EXEC_BLT;
>> +
>> +	memset(obj, 0, sizeof(obj));
>> +	obj[0].handle = dst->handle;
>> +	obj[0].offset = dst->gtt_offset;
>> +	if (dst->tiling)
>> +		obj[0].flags = EXEC_OBJECT_NEEDS_FENCE;
>> +
>> +	obj[1].handle = src->handle;
>> +	obj[1].offset = src->gtt_offset;
>> +	if (src->tiling)
>> +		obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
>> +
>> +	memset(reloc, 0, sizeof(reloc));
>> +	obj[2].handle = gem_create(device->fd, 4096);
>> +	obj[2].relocs_ptr = to_user_pointer(reloc);
>> +	obj[2].relocation_count = ARRAY_SIZE(reloc);
>> +	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
>> +
>> +	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
>> +		unsigned int mask;
>> +
>> +		batch[i++] = MI_LOAD_REGISTER_IMM;
>> +		batch[i++] = BCS_SWCTRL;
>> +
>> +		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
>> +		if (src->tiling == I915_TILING_Y)
>> +			mask |= BCS_SRC_Y;
>> +		if (dst->tiling == I915_TILING_Y)
>> +			mask |= BCS_DST_Y;
>> +		batch[i++] = mask;
>> +	}
>> +
>> +	batch[i] = (XY_SRC_COPY_BLT_CMD |
>> +		    XY_SRC_COPY_BLT_WRITE_ALPHA |
>> +		    XY_SRC_COPY_BLT_WRITE_RGB);
>> +	if (device->gen >= 4 && src->tiling)
>> +		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
>> +	if (device->gen >= 4 && dst->tiling)
>> +		batch[i] |= XY_SRC_COPY_BLT_DST_TILED;
>> +	batch[i++] |= 6 + 2 * has_64b_reloc;
>> +
>> +	pitch = dst->stride;
>> +	if (device->gen >= 4 && dst->tiling)
>> +		pitch /= 4;
>> +	batch[i++] = 3 << 24 | 0xcc << 16 | pitch;
>> +
>> +	batch[i++] = dst_y << 16 | dst_x;
>> +	batch[i++] = (height + dst_y) << 16 | (width + dst_x);
>> +	reloc[0].target_handle = obj[0].handle;
>> +	reloc[0].presumed_offset = obj[0].offset;
>> +	reloc[0].offset = sizeof(*batch) * i;
>> +	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
>> +	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
>> +	batch[i++] = obj[0].offset;
>> +	if (has_64b_reloc)
>> +		batch[i++] = obj[0].offset >> 32;
>> +
>> +	batch[i++] = src_y << 16 | src_x;
>> +	pitch = src->stride;
>> +	if (device->gen >= 4 && src->tiling)
>> +		pitch /= 4;
>> +	batch[i++] = pitch;
>> +	reloc[1].target_handle = obj[1].handle;
>> +	reloc[1].presumed_offset = obj[1].offset;
>> +	reloc[1].offset = sizeof(*batch) * i;
>> +	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
>> +	batch[i++] = obj[1].offset;
>> +	if (has_64b_reloc)
>> +		batch[i++] = obj[1].offset >> 32;
>> +
>> +	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
>> +		igt_assert(device->gen >= 6);
>> +		batch[i++] = MI_FLUSH_DW | 2;
>> +		batch[i++] = 0;
>> +		batch[i++] = 0;
>> +		batch[i++] = 0;
>> +
>> +		batch[i++] = MI_LOAD_REGISTER_IMM;
>> +		batch[i++] = BCS_SWCTRL;
>> +		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
>> +	}
>> +
>> +	batch[i++] = MI_BATCH_BUFFER_END;
>> +	munmap(batch, 4096);
>> +
>> +	gem_execbuf(device->fd, &execbuf);
>> +	gem_close(device->fd, obj[2].handle);
>> +
>> +	dst->gtt_offset = obj[0].offset;
>> +	src->gtt_offset = obj[1].offset;
>> +
>> +	memcpy_blt(src->model, dst->model,
>> +		   4u * src->width, 4u * dst->width,
>> +		   src_x, src_y,
>> +		   dst_x, dst_y,
>> +		   width, height);
>> +}
>> +
>> +enum start {
>> +	ZERO,
>> +	ABOVE,
>> +	BELOW
>> +};
>> +
>> +static int start_at(int x, enum start s)
>> +{
>> +	switch (s) {
>> +	default:
>> +	case ZERO:
>> +		return 0;
>> +	case ABOVE:
>> +		return 1;
>> +	case BELOW:
>> +		return x - 1;
>> +	}
>> +}
>> +
>> +igt_main
>> +{
>> +	struct device device;
>> +
>> +	igt_fixture {
>> +		device.fd = drm_open_driver_render(DRIVER_INTEL);
>> +		igt_require_gem(device.fd);
>> +
>> +		device.pciid = intel_get_drm_devid(device.fd);
>> +		device.gen = intel_gen(device.pciid);
>> +		device.llc = gem_has_llc(device.fd);
>> +	}
>> +
>> +	igt_subtest("basic") {
>> +		struct buffer *src, *dst;
>> +		unsigned int x, y;
>> +
>> +		for (unsigned int height = 1; height <= 16; height <<= 1) {
>> +			for (unsigned int y0 = ZERO; y0 <= (height > 2 ? BELOW : ZERO); y0++) {
>> +				for (unsigned int width = 1; width <= 64; width <<= 1) {
>> +					for (unsigned int x0 = ZERO; x0 <= (width > 2 ? BELOW : ZERO); x0++) {
>> +
>> +						src = buffer_create(&device,
>> +								    width * 16, height * 4);
>> +						dst = buffer_create(&device,
>> +								    width * 16, height * 4);
>> +
>> +						y = start_at(height, y0);
>> +						for (unsigned int src_tiling = I915_TILING_NONE;
>> +						     src_tiling <= (device.gen >= 6 ? I915_TILING_Y : I915_TILING_X);
>> +						     src_tiling++) {
>> +							buffer_set_tiling(&device, src, src_tiling);
>> +
>> +							x = start_at(width, x0);
>> +							for (unsigned int dst_tiling = I915_TILING_NONE;
>> +							     dst_tiling <= (device.gen >= 6 ? I915_TILING_Y : I915_TILING_X);
>> +							     dst_tiling++) {
>> +								buffer_set_tiling(&device, dst, dst_tiling);
>> +
>> +								for (enum mode down = CPU; down <= WC; down++) {

It seems like mode is only used in the buffer_check function, unless I
missed something. May be instead of looping over mode and doing the same
blit operation multiple times we can just do mode = (width % 3)?

Other than that, looks good to me.

Reviewed-by: Vanshidhar Konda <vanshidhar.r.konda@intel.com>

>> +									igt_debug("Testing src_tiling=%d, dst_tiling=%d, down=%d at (%d, %d) x (%d, %d)\n",
>> +										  src_tiling,
>> +										  dst_tiling,
>> +										  down, x, y,
>> +										  width, height);
>> +
>> +									igt_assert(x + width <= dst->width);
>> +									igt_assert(y + height <= dst->height);
>> +
>> +									blit(&device,
>> +									     src, x, y,
>> +									     dst, x, y,
>> +									     width, height);
>> +									igt_assert(buffer_check(&device, dst, down));
>> +
>> +									x += width;
>> +								}
>> +							}
>> +
>> +							y += height;
>> +						}
>> +
>> +						buffer_free(&device, dst);
>> +						buffer_free(&device, src);
>> +					}
>> +				}
>> +			}
>> +		}
>> +	}
>> +}
>> diff --git a/tests/meson.build b/tests/meson.build
>> index 3f3eee277..98f2db555 100644
>> --- a/tests/meson.build
>> +++ b/tests/meson.build
>> @@ -106,6 +106,7 @@ i915_progs = [
>>  	'gen3_render_tiledy_blits',
>>  	'gem_bad_reloc',
>>  	'gem_basic',
>> +	'gem_blits',
>>  	'gem_busy',
>>  	'gem_caching',
>>  	'gem_close',
>> --
>> 2.24.0
>>
>Code looks good, but I think that Vanshidhar Konda should look more closely at
>details.
>Kasia
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits
  2019-11-05 23:50   ` Vanshidhar Konda
@ 2019-11-06  9:22     ` Chris Wilson
  0 siblings, 0 replies; 11+ messages in thread
From: Chris Wilson @ 2019-11-06  9:22 UTC (permalink / raw)
  To: Vanshidhar Konda; +Cc: igt-dev

Quoting Vanshidhar Konda (2019-11-05 23:50:57)
> On Tue, Nov 05, 2019 at 02:25:28PM +0100, Katarzyna Dec wrote:
> >On Tue, Nov 05, 2019 at 11:15:13AM +0000, Chris Wilson wrote:
> >> +    igt_subtest("basic") {
> >> +            struct buffer *src, *dst;
> >> +            unsigned int x, y;
> >> +
> >> +            for (unsigned int height = 1; height <= 16; height <<= 1) {
> >> +                    for (unsigned int y0 = ZERO; y0 <= (height > 2 ? BELOW : ZERO); y0++) {
> >> +                            for (unsigned int width = 1; width <= 64; width <<= 1) {
> >> +                                    for (unsigned int x0 = ZERO; x0 <= (width > 2 ? BELOW : ZERO); x0++) {
> >> +
> >> +                                            src = buffer_create(&device,
> >> +                                                                width * 16, height * 4);
> >> +                                            dst = buffer_create(&device,
> >> +                                                                width * 16, height * 4);
> >> +
> >> +                                            y = start_at(height, y0);
> >> +                                            for (unsigned int src_tiling = I915_TILING_NONE;
> >> +                                                 src_tiling <= (device.gen >= 6 ? I915_TILING_Y : I915_TILING_X);
> >> +                                                 src_tiling++) {
> >> +                                                    buffer_set_tiling(&device, src, src_tiling);
> >> +
> >> +                                                    x = start_at(width, x0);
> >> +                                                    for (unsigned int dst_tiling = I915_TILING_NONE;
> >> +                                                         dst_tiling <= (device.gen >= 6 ? I915_TILING_Y : I915_TILING_X);
> >> +                                                         dst_tiling++) {
> >> +                                                            buffer_set_tiling(&device, dst, dst_tiling);
> >> +
> >> +                                                            for (enum mode down = CPU; down <= WC; down++) {
> 
> It seems like mode is only used in the buffer_check function, unless I
> missed something. May be instead of looping over mode and doing the same
> blit operation multiple times we can just do mode = (width % 3)?

You are using the blit to set up the dirty state, then inspecting it via
different modes. One of those modes should probably be a mixture of the
others... Since how we flush the dirty state is a big part of the test,
I don't see it being easily reduced... It's about mixing it up and not
missing anything.

The art is all in picking the right parameter space to trick the kernel
& HW into making a mistake and being able to detect it. The real art is
in being thorough, but finding the bugs early! Think of this as only the
start!
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2019-11-06  9:22 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-11-05 11:15 [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits Chris Wilson
2019-11-05 11:51 ` [igt-dev] ✗ GitLab.Pipeline: warning for igt: Another combinatorial exercise for blits (rev2) Patchwork
2019-11-05 12:04 ` [igt-dev] ✓ Fi.CI.BAT: success " Patchwork
2019-11-05 13:25 ` [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits Katarzyna Dec
2019-11-05 23:50   ` Vanshidhar Konda
2019-11-06  9:22     ` Chris Wilson
2019-11-05 21:48 ` [igt-dev] ✗ Fi.CI.IGT: failure for igt: Another combinatorial exercise for blits (rev2) Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2018-07-25 21:38 [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits Chris Wilson
2018-08-01 14:47 ` Katarzyna Dec
2018-08-01 15:10   ` Chris Wilson
2018-08-02 11:05     ` Katarzyna Dec

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox