igt-dev.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits
@ 2018-07-25 21:38 Chris Wilson
  2018-07-26  0:19 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Chris Wilson @ 2018-07-25 21:38 UTC (permalink / raw)
  To: intel-gfx; +Cc: igt-dev

The aim of this test is to combine gem_linear_blits, gem_tiled_blits etc
into one test runner that covers investigation into HW alignment issues
as well as driver boundaries (relocs, access, thrashing).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Katarzyna Dec <katarzyna.dec@intel.com>
---
 tests/Makefile.sources |   1 +
 tests/gem_blits.c      | 753 +++++++++++++++++++++++++++++++++++++++++
 tests/meson.build      |   1 +
 3 files changed, 755 insertions(+)
 create mode 100644 tests/gem_blits.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index c84933f1d..564545fb7 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -41,6 +41,7 @@ TESTS_progs = \
 	drv_suspend \
 	gem_bad_reloc \
 	gem_basic \
+	gem_blits \
 	gem_busy \
 	gem_caching \
 	gem_close \
diff --git a/tests/gem_blits.c b/tests/gem_blits.c
new file mode 100644
index 000000000..44da775a4
--- /dev/null
+++ b/tests/gem_blits.c
@@ -0,0 +1,753 @@
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include "igt_x86.h"
+
+#define MI_FLUSH_DW (0x26 << 23)
+
+#define BCS_SWCTRL 0x22200
+#define BCS_SRC_Y (1 << 0)
+#define BCS_DST_Y (1 << 1)
+
+struct device {
+	int fd;
+	int gen;
+	int pciid;
+	int llc;
+};
+
+struct buffer {
+	uint32_t handle;
+	uint16_t width;
+	uint16_t height;
+	uint16_t stride;
+	uint32_t size;
+	unsigned int tiling;
+	unsigned int caching;
+	uint64_t gtt_offset;
+	uint32_t model[] __attribute__((aligned(16)));
+};
+
+static int fls(uint64_t x)
+{
+	int t;
+
+	for (t = 0; x >> t; t++)
+		;
+
+	return t;
+}
+
+static unsigned int
+get_tiling_stride(const struct device *device,
+		  unsigned int width, unsigned int tiling)
+{
+	unsigned int stride = 4u * width;
+
+	if (tiling) {
+		if (device->gen < 3)
+			stride = ALIGN(stride, 128);
+		else if (device->gen < 4 || tiling == I915_TILING_X)
+			stride = ALIGN(stride, 512);
+		else
+			stride = ALIGN(stride, 128);
+		if (device->gen < 4)
+			stride = 1 << fls(stride - 1);
+	} else {
+		if (device->gen >= 8)
+			stride = ALIGN(stride, 64);
+	}
+
+	igt_assert(stride < UINT16_MAX && stride >= 4*width);
+	return stride;
+}
+
+static unsigned int
+get_tiling_height(const struct device *device,
+		  unsigned int height, unsigned int tiling)
+{
+	if (!tiling)
+		return height;
+
+	if (device->gen < 3)
+		return ALIGN(height, 16);
+	else if (device->gen < 4 || tiling == I915_TILING_X)
+		return ALIGN(height, 8);
+	else
+		return ALIGN(height, 32);
+}
+
+static struct buffer *buffer_create(const struct device *device,
+				    unsigned int width,
+				    unsigned int height)
+{
+	struct buffer *buffer;
+
+	igt_assert(width && height);
+
+	buffer = calloc(1, sizeof(*buffer) + 4u * width * height);
+	if (!buffer)
+		return NULL;
+
+	buffer->width = width;
+	buffer->height = height;
+
+	buffer->stride = get_tiling_stride(device, width, I915_TILING_NONE);
+	buffer->size = ALIGN(buffer->stride * height, 4096);
+	buffer->handle = gem_create(device->fd, buffer->size);
+	buffer->caching = device->llc;
+
+	for (int y = 0; y < height; y++) {
+		uint32_t *row = buffer->model + y * width;
+
+		for (int x = 0; x < width; x++)
+			row[x] = (y << 16 | x) ^ buffer->handle;
+
+		gem_write(device->fd,
+			  buffer->handle, 4u * y * width,
+			  row, 4u * width);
+	}
+
+	return buffer;
+}
+
+static void buffer_set_tiling(const struct device *device,
+			      struct buffer *buffer,
+			      unsigned int tiling)
+{
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	const bool has_64b_reloc = device->gen >= 8;
+	uint32_t stride, size, pitch;
+	uint32_t *batch;
+	int i;
+
+	if (buffer->tiling == tiling)
+		return;
+
+	stride = get_tiling_stride(device, buffer->width, tiling);
+	size = stride * get_tiling_height(device, buffer->height, tiling);
+	size = ALIGN(size, 4096);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = ARRAY_SIZE(obj);
+	if (device->gen >= 6)
+		execbuf.flags = I915_EXEC_BLT;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = gem_create(device->fd, size);
+	if (tiling) {
+		obj[0].flags = EXEC_OBJECT_NEEDS_FENCE;
+		gem_set_tiling(device->fd, obj[0].handle, tiling, stride);
+	}
+
+	obj[1].handle = buffer->handle;
+	obj[1].offset = buffer->gtt_offset;
+	if (buffer->tiling)
+		obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
+
+	obj[2].handle = gem_create(device->fd, 4096);
+	obj[2].relocs_ptr = to_user_pointer(memset(reloc, 0, sizeof(reloc)));
+	obj[2].relocation_count = 2;
+	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
+
+	i = 0;
+
+	if ((tiling | buffer->tiling) >= I915_TILING_Y) {
+		unsigned int mask;
+
+		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = BCS_SWCTRL;
+
+		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
+		if (buffer->tiling == I915_TILING_Y)
+			mask |= BCS_SRC_Y;
+		if (tiling == I915_TILING_Y)
+			mask |= BCS_DST_Y;
+		batch[i++] = mask;
+	}
+
+	batch[i] = (XY_SRC_COPY_BLT_CMD |
+		    XY_SRC_COPY_BLT_WRITE_ALPHA |
+		    XY_SRC_COPY_BLT_WRITE_RGB);
+	batch[i] |= 6 + 2 * has_64b_reloc;
+	if (device->gen >= 4 && buffer->tiling)
+		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
+	if (device->gen >= 4 && tiling)
+		batch[i] |= XY_SRC_COPY_BLT_DST_TILED;
+	i++;
+
+	pitch = stride;
+	if (device->gen >= 4 && tiling)
+		pitch /= 4;
+	batch[i++] = 3 << 24 | 0xcc << 16 | pitch;
+	batch[i++] = 0;
+	batch[i++] = buffer->height << 16 | buffer->width;
+	reloc[0].target_handle = obj[0].handle;
+	reloc[0].presumed_offset = obj[0].offset;
+	reloc[0].offset = sizeof(*batch) * i;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = obj[0].offset;
+	if (has_64b_reloc)
+		batch[i++] = obj[0].offset >> 32;
+
+	batch[i++] = 0;
+	pitch = buffer->stride;
+	if (device->gen >= 4 && buffer->tiling)
+		pitch /= 4;
+	batch[i++] = pitch;
+	reloc[1].target_handle = obj[1].handle;
+	reloc[1].presumed_offset = obj[1].offset;
+	reloc[1].offset = sizeof(*batch) * i;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = obj[1].offset;
+	if (has_64b_reloc)
+		batch[i++] = obj[1].offset >> 32;
+
+	if ((tiling | buffer->tiling) >= I915_TILING_Y) {
+		igt_assert(device->gen >= 6);
+		batch[i++] = MI_FLUSH_DW | 2;
+		batch[i++] = 0;
+		batch[i++] = 0;
+		batch[i++] = 0;
+
+		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = BCS_SWCTRL;
+		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
+	}
+
+	batch[i++] = MI_BATCH_BUFFER_END;
+	munmap(batch, 4096);
+
+	gem_execbuf(device->fd, &execbuf);
+
+	gem_close(device->fd, obj[2].handle);
+	gem_close(device->fd, obj[1].handle);
+
+	buffer->gtt_offset = obj[0].offset;
+	buffer->handle = obj[0].handle;
+
+	buffer->tiling = tiling;
+	buffer->stride = stride;
+	buffer->size = size;
+}
+
+enum mode {
+	CPU,
+	PRW,
+	GTT,
+	WC,
+};
+
+static void blit_to_linear(const struct device *device,
+			   const struct buffer *buffer,
+			   void *linear)
+{
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	const bool has_64b_reloc = device->gen >= 8;
+	uint32_t *batch;
+	uint32_t pitch;
+	int i = 0;
+
+	igt_assert(buffer->tiling);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = ARRAY_SIZE(obj);
+	if (device->gen >= 6)
+		execbuf.flags = I915_EXEC_BLT;
+
+	memset(obj, 0, sizeof(obj));
+	gem_userptr(device->fd, linear, buffer->size, 0, 0, &obj[0].handle);
+	obj[1].handle = buffer->handle;
+	obj[1].offset = buffer->gtt_offset;
+	obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
+
+	memset(reloc, 0, sizeof(reloc));
+	obj[2].handle = gem_create(device->fd, 4096);
+	obj[2].relocs_ptr = to_user_pointer(reloc);
+	obj[2].relocation_count = ARRAY_SIZE(reloc);
+	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
+
+	if (buffer->tiling >= I915_TILING_Y) {
+		unsigned int mask;
+
+		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = BCS_SWCTRL;
+
+		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
+		if (buffer->tiling == I915_TILING_Y)
+			mask |= BCS_SRC_Y;
+		batch[i++] = mask;
+	}
+
+	batch[i] = (XY_SRC_COPY_BLT_CMD |
+		    XY_SRC_COPY_BLT_WRITE_ALPHA |
+		    XY_SRC_COPY_BLT_WRITE_RGB);
+	if (device->gen >= 4 && buffer->tiling)
+		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
+
+	batch[i++] |= 6 + 2 * has_64b_reloc;
+
+	batch[i++] = 3 << 24 | 0xcc << 16 | buffer->stride;
+	batch[i++] = 0;
+	batch[i++] = buffer->height << 16 | buffer->width;
+	reloc[0].target_handle = obj[0].handle;
+	reloc[0].presumed_offset = obj[0].offset;
+	reloc[0].offset = sizeof(*batch) * i;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = obj[0].offset;
+	if (has_64b_reloc)
+		batch[i++] = obj[0].offset >> 32;
+
+	batch[i++] = 0;
+	pitch = buffer->stride;
+	if (device->gen >= 4 && buffer->tiling)
+		pitch /= 4;
+	batch[i++] = pitch;
+	reloc[1].target_handle = obj[1].handle;
+	reloc[1].presumed_offset = obj[1].offset;
+	reloc[1].offset = sizeof(*batch) * i;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = obj[1].offset;
+	if (has_64b_reloc)
+		batch[i++] = obj[1].offset >> 32;
+
+	if (buffer->tiling >= I915_TILING_Y) {
+		igt_assert(device->gen >= 6);
+		batch[i++] = MI_FLUSH_DW | 2;
+		batch[i++] = 0;
+		batch[i++] = 0;
+		batch[i++] = 0;
+
+		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = BCS_SWCTRL;
+		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
+	}
+
+	batch[i++] = MI_BATCH_BUFFER_END;
+	munmap(batch, 4096);
+
+	gem_execbuf(device->fd, &execbuf);
+	gem_close(device->fd, obj[2].handle);
+
+	gem_sync(device->fd, obj[0].handle);
+	gem_close(device->fd, obj[0].handle);
+}
+
+static void *download(const struct device *device,
+		      const struct buffer *buffer,
+		      enum mode mode)
+{
+	void *linear, *src;
+
+	igt_assert(posix_memalign(&linear, 4096, buffer->size) == 0);
+
+	switch (mode) {
+	case CPU:
+		if (buffer->tiling) {
+			if (buffer->caching && !device->llc) {
+				mode = GTT;
+				break;
+			}
+			if (device->gen < 3) {
+				mode = GTT;
+				break;
+			}
+
+			blit_to_linear(device, buffer, linear);
+			return linear;
+		}
+		break;
+
+	case PRW:
+	case WC:
+		if (!buffer->tiling)
+			break;
+
+	default:
+		mode = GTT;
+		break;
+	}
+
+	switch (mode) {
+	case CPU:
+		src = gem_mmap__cpu(device->fd, buffer->handle,
+				    0, buffer->size,
+				    PROT_READ);
+
+		gem_set_domain(device->fd, buffer->handle,
+			       I915_GEM_DOMAIN_CPU, 0);
+		igt_memcpy_from_wc(linear, src, buffer->size);
+		munmap(src, buffer->size);
+		break;
+
+	case WC:
+		src = gem_mmap__wc(device->fd, buffer->handle,
+				   0, buffer->size,
+				   PROT_READ);
+
+		gem_set_domain(device->fd, buffer->handle,
+			       I915_GEM_DOMAIN_WC, 0);
+		igt_memcpy_from_wc(linear, src, buffer->size);
+		munmap(src, buffer->size);
+		break;
+
+	case GTT:
+		src = gem_mmap__gtt(device->fd, buffer->handle,
+				   buffer->size,
+				   PROT_READ);
+
+		gem_set_domain(device->fd, buffer->handle,
+			       I915_GEM_DOMAIN_GTT, 0);
+		igt_memcpy_from_wc(linear, src, buffer->size);
+		munmap(src, buffer->size);
+		break;
+
+	case PRW:
+		gem_read(device->fd, buffer->handle, 0, linear, buffer->size);
+		break;
+	}
+
+	return linear;
+}
+
+static bool buffer_check(const struct device *device,
+			 const struct buffer *buffer,
+			 enum mode mode)
+{
+	unsigned int num_errors = 0;
+	uint32_t *linear;
+
+	linear = download(device, buffer, mode);
+	igt_assert(linear);
+
+	for (int y = 0; y < buffer->height; y++) {
+		const uint32_t *model = buffer->model + y * buffer->width;
+		const uint32_t *row =
+			linear + y * buffer->stride / sizeof(uint32_t);
+
+		for (int x = 0; x < buffer->width; x++) {
+			if (row[x] != model[x] && num_errors++ < 5) {
+				igt_warn("buffer handle=%d mismatch at (%d, %d): expected %08x, found %08x\n",
+					 buffer->handle,
+					 x, y, model[x], row[x]);
+			}
+		}
+	}
+
+	free(linear);
+
+	return num_errors == 0;
+}
+
+static void buffer_free(const struct device *device, struct buffer *buffer)
+{
+	igt_assert(buffer_check(device, buffer, GTT));
+	gem_close(device->fd, buffer->handle);
+	free(buffer);
+}
+
+static void memcpy_blt(const void *src, void *dst,
+		       uint32_t src_stride, uint32_t dst_stride,
+		       uint16_t src_x, uint16_t src_y,
+		       uint16_t dst_x, uint16_t dst_y,
+		       uint16_t width, uint16_t height)
+{
+	const uint8_t *src_bytes;
+	uint8_t *dst_bytes;
+	int byte_width;
+
+	src_bytes = (const uint8_t *)src + src_stride * src_y + src_x * 4;
+	dst_bytes = (uint8_t *)dst + dst_stride * dst_y + dst_x * 4;
+
+	byte_width = width * 4;
+	if (byte_width == src_stride && byte_width == dst_stride) {
+		byte_width *= height;
+		height = 1;
+	}
+
+	switch (byte_width) {
+	case 4:
+		do {
+			*(uint32_t *)dst_bytes = *(const uint32_t *)src_bytes;
+			src_bytes += src_stride;
+			dst_bytes += dst_stride;
+		} while (--height);
+		break;
+
+	case 8:
+		do {
+			*(uint64_t *)dst_bytes = *(const uint64_t *)src_bytes;
+			src_bytes += src_stride;
+			dst_bytes += dst_stride;
+		} while (--height);
+		break;
+	case 16:
+		do {
+			((uint64_t *)dst_bytes)[0] = ((const uint64_t *)src_bytes)[0];
+			((uint64_t *)dst_bytes)[1] = ((const uint64_t *)src_bytes)[1];
+			src_bytes += src_stride;
+			dst_bytes += dst_stride;
+		} while (--height);
+		break;
+
+	default:
+		do {
+			memcpy(dst_bytes, src_bytes, byte_width);
+			src_bytes += src_stride;
+			dst_bytes += dst_stride;
+		} while (--height);
+		break;
+	}
+}
+
+static void
+blit(const struct device *device,
+     struct buffer *src, uint16_t src_x, uint16_t src_y,
+     struct buffer *dst, uint16_t dst_x, uint16_t dst_y,
+     uint16_t width, uint16_t height)
+
+{
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	const bool has_64b_reloc = device->gen >= 8;
+	uint32_t *batch;
+	uint32_t pitch;
+	int i = 0;
+
+	if (src_x < 0) {
+		width += src_x;
+		dst_x -= src_x;
+		src_x = 0;
+	}
+	if (src_y < 0) {
+		height += src_y;
+		dst_y  -= src_y;
+		src_y = 0;
+	}
+
+	if (dst_x < 0) {
+		width += dst_x;
+		src_x -= dst_x;
+		dst_x = 0;
+	}
+	if (dst_y < 0) {
+		height += dst_y;
+		src_y  -= dst_y;
+		dst_y = 0;
+	}
+
+	if (src_x + width > src->width)
+		width = src->width - src_x;
+	if (dst_x + width > dst->width)
+		width = dst->width - dst_x;
+
+	if (src_y + height > src->height)
+		height = src->height - src_y;
+	if (dst_y + height > dst->height)
+		height = dst->height - dst_y;
+
+	if (dst->caching) {
+		igt_assert(device->gen >= 3);
+		igt_assert(device->llc || !src->caching);
+	}
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = ARRAY_SIZE(obj);
+	if (device->gen >= 6)
+		execbuf.flags = I915_EXEC_BLT;
+
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = dst->handle;
+	obj[0].offset = dst->gtt_offset;
+	if (dst->tiling)
+		obj[0].flags = EXEC_OBJECT_NEEDS_FENCE;
+
+	obj[1].handle = src->handle;
+	obj[1].offset = src->gtt_offset;
+	if (src->tiling)
+		obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
+
+	memset(reloc, 0, sizeof(reloc));
+	obj[2].handle = gem_create(device->fd, 4096);
+	obj[2].relocs_ptr = to_user_pointer(reloc);
+	obj[2].relocation_count = ARRAY_SIZE(reloc);
+	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
+
+	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
+		unsigned int mask;
+
+		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = BCS_SWCTRL;
+
+		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
+		if (src->tiling == I915_TILING_Y)
+			mask |= BCS_SRC_Y;
+		if (dst->tiling == I915_TILING_Y)
+			mask |= BCS_DST_Y;
+		batch[i++] = mask;
+	}
+
+	batch[i] = (XY_SRC_COPY_BLT_CMD |
+		    XY_SRC_COPY_BLT_WRITE_ALPHA |
+		    XY_SRC_COPY_BLT_WRITE_RGB);
+	batch[i] |= 6 + 2 * has_64b_reloc;
+	if (device->gen >= 4 && src->tiling)
+		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
+	if (device->gen >= 4 && dst->tiling)
+		batch[i] |= XY_SRC_COPY_BLT_DST_TILED;
+	i++;
+
+	pitch = dst->stride;
+	if (device->gen >= 4 && dst->tiling)
+		pitch /= 4;
+	batch[i++] = 3 << 24 | 0xcc << 16 | pitch;
+
+	batch[i++] = dst_y << 16 | dst_x;
+	batch[i++] = (height + dst_y) << 16 | (width + dst_x);
+	reloc[0].target_handle = obj[0].handle;
+	reloc[0].presumed_offset = obj[0].offset;
+	reloc[0].offset = sizeof(*batch) * i;
+	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = obj[0].offset;
+	if (has_64b_reloc)
+		batch[i++] = obj[0].offset >> 32;
+
+	batch[i++] = src_y << 16 | src_x;
+	pitch = src->stride;
+	if (device->gen >= 4 && src->tiling)
+		pitch /= 4;
+	batch[i++] = pitch;
+	reloc[1].target_handle = obj[1].handle;
+	reloc[1].presumed_offset = obj[1].offset;
+	reloc[1].offset = sizeof(*batch) * i;
+	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+	batch[i++] = obj[1].offset;
+	if (has_64b_reloc)
+		batch[i++] = obj[1].offset >> 32;
+
+	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
+		igt_assert(device->gen >= 6);
+		batch[i++] = MI_FLUSH_DW | 2;
+		batch[i++] = 0;
+		batch[i++] = 0;
+		batch[i++] = 0;
+
+		batch[i++] = MI_LOAD_REGISTER_IMM;
+		batch[i++] = BCS_SWCTRL;
+		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
+	}
+
+	batch[i++] = MI_BATCH_BUFFER_END;
+	munmap(batch, 4096);
+
+	gem_execbuf(device->fd, &execbuf);
+	gem_close(device->fd, obj[2].handle);
+
+	dst->gtt_offset = obj[0].offset;
+	src->gtt_offset = obj[1].offset;
+
+	memcpy_blt(src->model, dst->model,
+		   4u * src->width, 4u * dst->width,
+		   src_x, src_y,
+		   dst_x, dst_y,
+		   width, height);
+}
+
+igt_main
+{
+	struct device device;
+
+	igt_fixture {
+		device.fd = drm_open_driver_render(DRIVER_INTEL);
+		igt_require_gem(device.fd);
+
+		device.pciid = intel_get_drm_devid(device.fd);
+		device.gen = intel_gen(device.pciid);
+		device.llc = gem_has_llc(device.fd);
+	}
+
+	igt_subtest("basic") {
+		struct buffer *src, *dst;
+		unsigned int x, y;
+
+		for (unsigned int height = 1; height <= 16; height <<= 1) {
+			for (unsigned int width = 1; width <= 64; width <<= 1) {
+				src = buffer_create(&device,
+						    width * 16, height * 4);
+				dst = buffer_create(&device,
+						    width * 16, height * 4);
+
+				y = 0;
+				for (unsigned int src_tiling = I915_TILING_NONE;
+				     src_tiling <= (device.gen >= 6 ? I915_TILING_Y : I915_TILING_X);
+				     src_tiling++) {
+					buffer_set_tiling(&device, src, src_tiling);
+
+					x = 0;
+					for (unsigned int dst_tiling = I915_TILING_NONE;
+					     dst_tiling <= (device.gen >= 6 ? I915_TILING_Y : I915_TILING_X);
+					     dst_tiling++) {
+						buffer_set_tiling(&device, dst, dst_tiling);
+
+						for (enum mode down = CPU; down <= WC; down++) {
+							igt_debug("Testing src_tiling=%d, dst_tiling=%d, down=%d at (%d, %d) x (%d, %d)\n",
+								  src_tiling,
+								  dst_tiling,
+								  down, x, y,
+								  width, height);
+
+							igt_assert(x + width <= dst->width);
+							igt_assert(y + height <= dst->height);
+
+							blit(&device,
+							     src, x, y,
+							     dst, x, y,
+							     width, height);
+							igt_assert(buffer_check(&device, dst, down));
+
+							x += width;
+						}
+					}
+
+					y += height;
+				}
+
+				buffer_free(&device, dst);
+				buffer_free(&device, src);
+			}
+		}
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index 32c2156c6..becadfa8d 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -18,6 +18,7 @@ test_progs = [
 	'drv_suspend',
 	'gem_bad_reloc',
 	'gem_basic',
+	'gem_blits',
 	'gem_busy',
 	'gem_caching',
 	'gem_close',
-- 
2.18.0

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [igt-dev] ✓ Fi.CI.BAT: success for igt: Another combinatorial exercise for blits
  2018-07-25 21:38 [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits Chris Wilson
@ 2018-07-26  0:19 ` Patchwork
  2018-07-26  3:09 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
  2018-08-01 14:47 ` [igt-dev] [PATCH i-g-t] " Katarzyna Dec
  2 siblings, 0 replies; 6+ messages in thread
From: Patchwork @ 2018-07-26  0:19 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: igt: Another combinatorial exercise for blits
URL   : https://patchwork.freedesktop.org/series/47249/
State : success

== Summary ==

= CI Bug Log - changes from CI_DRM_4547 -> IGTPW_1646 =

== Summary - SUCCESS ==

  No regressions found.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/47249/revisions/1/mbox/

== Known issues ==

  Here are the changes found in IGTPW_1646 that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@drv_module_reload@basic-no-display:
      fi-cnl-psr:         NOTRUN -> DMESG-WARN (fdo#105395) +2

    igt@gem_exec_suspend@basic-s3:
      {fi-skl-caroline}:  NOTRUN -> INCOMPLETE (fdo#104108)

    igt@gem_exec_suspend@basic-s4-devices:
      fi-kbl-7500u:       PASS -> DMESG-WARN (fdo#105128, fdo#107139)

    igt@gem_sync@basic-each:
      fi-glk-j4005:       PASS -> DMESG-WARN (fdo#105719)

    igt@kms_flip@basic-flip-vs-dpms:
      fi-glk-j4005:       PASS -> DMESG-WARN (fdo#106000)

    {igt@kms_psr@cursor_plane_move}:
      fi-cnl-psr:         NOTRUN -> DMESG-FAIL (fdo#107372)

    {igt@kms_psr@primary_mmap_gtt}:
      fi-cnl-psr:         NOTRUN -> DMESG-WARN (fdo#107372) +1

    igt@prime_vgem@basic-fence-flip:
      fi-ilk-650:         PASS -> FAIL (fdo#104008)

    
    ==== Possible fixes ====

    igt@gem_exec_create@basic:
      fi-glk-j4005:       DMESG-WARN (fdo#106745) -> PASS

    igt@kms_flip@basic-flip-vs-modeset:
      fi-glk-j4005:       DMESG-WARN (fdo#106000) -> PASS

    
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  fdo#104008 https://bugs.freedesktop.org/show_bug.cgi?id=104008
  fdo#104108 https://bugs.freedesktop.org/show_bug.cgi?id=104108
  fdo#105128 https://bugs.freedesktop.org/show_bug.cgi?id=105128
  fdo#105395 https://bugs.freedesktop.org/show_bug.cgi?id=105395
  fdo#105719 https://bugs.freedesktop.org/show_bug.cgi?id=105719
  fdo#106000 https://bugs.freedesktop.org/show_bug.cgi?id=106000
  fdo#106745 https://bugs.freedesktop.org/show_bug.cgi?id=106745
  fdo#107139 https://bugs.freedesktop.org/show_bug.cgi?id=107139
  fdo#107372 https://bugs.freedesktop.org/show_bug.cgi?id=107372


== Participating hosts (48 -> 42) ==

  Additional (3): fi-byt-j1900 fi-skl-caroline fi-cnl-psr 
  Missing    (9): fi-ilk-m540 fi-hsw-4200u fi-skl-guc fi-byt-squawks fi-bsw-cyan fi-cfl-guc fi-kbl-guc fi-ctg-p8600 fi-byt-clapper 


== Build changes ==

    * IGT: IGT_4575 -> IGTPW_1646

  CI_DRM_4547: 0a7ab192a697e951b2404f3c1ce42c5fa74f9ed1 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_1646: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1646/
  IGT_4575: fe908a01012c9daafafb3410b9407725ca9d4f21 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools



== Testlist changes ==

+igt@gem_blits@basic

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1646/issues.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [igt-dev] ✓ Fi.CI.IGT: success for igt: Another combinatorial exercise for blits
  2018-07-25 21:38 [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits Chris Wilson
  2018-07-26  0:19 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
@ 2018-07-26  3:09 ` Patchwork
  2018-08-01 14:47 ` [igt-dev] [PATCH i-g-t] " Katarzyna Dec
  2 siblings, 0 replies; 6+ messages in thread
From: Patchwork @ 2018-07-26  3:09 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

== Series Details ==

Series: igt: Another combinatorial exercise for blits
URL   : https://patchwork.freedesktop.org/series/47249/
State : success

== Summary ==

= CI Bug Log - changes from IGT_4575_full -> IGTPW_1646_full =

== Summary - WARNING ==

  Minor unknown changes coming with IGTPW_1646_full need to be verified
  manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in IGTPW_1646_full, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://patchwork.freedesktop.org/api/1.0/series/47249/revisions/1/mbox/

== Possible new issues ==

  Here are the unknown changes that may have been introduced in IGTPW_1646_full:

  === IGT changes ===

    ==== Warnings ====

    igt@gem_mocs_settings@mocs-rc6-bsd1:
      shard-kbl:          SKIP -> PASS +2

    igt@perf_pmu@rc6-runtime-pm:
      shard-hsw:          SKIP -> PASS

    igt@pm_rc6_residency@rc6-accuracy:
      shard-kbl:          PASS -> SKIP +1

    
== Known issues ==

  Here are the changes found in IGTPW_1646_full that come from known issues:

  === IGT changes ===

    ==== Issues hit ====

    igt@kms_cursor_legacy@all-pipes-torture-bo:
      shard-apl:          PASS -> DMESG-WARN (fdo#107122)
      shard-snb:          PASS -> DMESG-WARN (fdo#107122)

    igt@kms_cursor_legacy@cursor-vs-flip-toggle:
      shard-hsw:          PASS -> FAIL (fdo#103355)

    igt@kms_frontbuffer_tracking@fbc-rgb565-draw-mmap-gtt:
      shard-snb:          PASS -> INCOMPLETE (fdo#105411)

    igt@kms_plane_multiple@atomic-pipe-a-tiling-x:
      shard-snb:          PASS -> FAIL (fdo#103166)

    igt@kms_rotation_crc@sprite-rotation-180:
      shard-snb:          PASS -> FAIL (fdo#103925)

    igt@kms_setmode@basic:
      shard-kbl:          PASS -> FAIL (fdo#99912)

    igt@kms_universal_plane@cursor-fb-leak-pipe-b:
      shard-apl:          PASS -> FAIL (fdo#107241)

    igt@kms_vblank@pipe-a-ts-continuation-suspend:
      shard-kbl:          PASS -> INCOMPLETE (fdo#103665)

    igt@pm_rpm@dpms-lpsp:
      shard-glk:          SKIP -> FAIL (fdo#106539)

    igt@pm_rpm@fences:
      shard-glk:          PASS -> FAIL (fdo#106539) +1

    
    ==== Possible fixes ====

    igt@drv_suspend@shrink:
      shard-apl:          INCOMPLETE (fdo#106886, fdo#103927) -> PASS

    igt@kms_flip@wf_vblank-ts-check-interruptible:
      shard-glk:          FAIL (fdo#100368) -> PASS

    igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-indfb-draw-mmap-wc:
      shard-snb:          INCOMPLETE (fdo#105411) -> PASS

    igt@pm_rpm@debugfs-forcewake-user:
      shard-hsw:          FAIL (fdo#106539) -> PASS +4

    igt@pm_rpm@drm-resources-equal:
      shard-kbl:          FAIL -> PASS
      shard-glk:          FAIL (fdo#106539) -> PASS
      shard-apl:          FAIL (fdo#106539) -> PASS

    
  fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368
  fdo#103166 https://bugs.freedesktop.org/show_bug.cgi?id=103166
  fdo#103355 https://bugs.freedesktop.org/show_bug.cgi?id=103355
  fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665
  fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925
  fdo#103927 https://bugs.freedesktop.org/show_bug.cgi?id=103927
  fdo#105411 https://bugs.freedesktop.org/show_bug.cgi?id=105411
  fdo#106539 https://bugs.freedesktop.org/show_bug.cgi?id=106539
  fdo#106886 https://bugs.freedesktop.org/show_bug.cgi?id=106886
  fdo#107122 https://bugs.freedesktop.org/show_bug.cgi?id=107122
  fdo#107241 https://bugs.freedesktop.org/show_bug.cgi?id=107241
  fdo#99912 https://bugs.freedesktop.org/show_bug.cgi?id=99912


== Participating hosts (5 -> 5) ==

  No changes in participating hosts


== Build changes ==

    * IGT: IGT_4575 -> IGTPW_1646
    * Linux: CI_DRM_4541 -> CI_DRM_4547

  CI_DRM_4541: 3e18e4c6c008597f4ff952d7a3457bd310ce945c @ git://anongit.freedesktop.org/gfx-ci/linux
  CI_DRM_4547: 0a7ab192a697e951b2404f3c1ce42c5fa74f9ed1 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGTPW_1646: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1646/
  IGT_4575: fe908a01012c9daafafb3410b9407725ca9d4f21 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1646/shards.html
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits
  2018-07-25 21:38 [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits Chris Wilson
  2018-07-26  0:19 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
  2018-07-26  3:09 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
@ 2018-08-01 14:47 ` Katarzyna Dec
  2018-08-01 15:10   ` Chris Wilson
  2 siblings, 1 reply; 6+ messages in thread
From: Katarzyna Dec @ 2018-08-01 14:47 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

On Wed, Jul 25, 2018 at 10:38:23PM +0100, Chris Wilson wrote:
> The aim of this test is to combine gem_linear_blits, gem_tiled_blits etc
> into one test runner that covers investigation into HW alignment issues
> as well as driver boundaries (relocs, access, thrashing).
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Katarzyna Dec <katarzyna.dec@intel.com>
> ---
>  tests/Makefile.sources |   1 +
>  tests/gem_blits.c      | 753 +++++++++++++++++++++++++++++++++++++++++
>  tests/meson.build      |   1 +
>  3 files changed, 755 insertions(+)
>  create mode 100644 tests/gem_blits.c
> 
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index c84933f1d..564545fb7 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -41,6 +41,7 @@ TESTS_progs = \
>  	drv_suspend \
>  	gem_bad_reloc \
>  	gem_basic \
> +	gem_blits \
>  	gem_busy \
>  	gem_caching \
>  	gem_close \
> diff --git a/tests/gem_blits.c b/tests/gem_blits.c
> new file mode 100644
> index 000000000..44da775a4
> --- /dev/null
> +++ b/tests/gem_blits.c
> @@ -0,0 +1,753 @@
> +/*
> + * Copyright © 2018 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "igt.h"
> +#include "igt_x86.h"
> +
> +#define MI_FLUSH_DW (0x26 << 23)
> +
> +#define BCS_SWCTRL 0x22200
> +#define BCS_SRC_Y (1 << 0)
> +#define BCS_DST_Y (1 << 1)
> +
> +struct device {
> +	int fd;
> +	int gen;
> +	int pciid;
> +	int llc;
> +};
> +
> +struct buffer {
> +	uint32_t handle;
> +	uint16_t width;
> +	uint16_t height;
> +	uint16_t stride;
> +	uint32_t size;
> +	unsigned int tiling;
> +	unsigned int caching;
> +	uint64_t gtt_offset;
> +	uint32_t model[] __attribute__((aligned(16)));
> +};
> +
> +static int fls(uint64_t x)
> +{
> +	int t;
> +
> +	for (t = 0; x >> t; t++)
> +		;
> +
> +	return t;
> +}
> +
> +static unsigned int
> +get_tiling_stride(const struct device *device,
> +		  unsigned int width, unsigned int tiling)
> +{
> +	unsigned int stride = 4u * width;
> +
> +	if (tiling) {
> +		if (device->gen < 3)
> +			stride = ALIGN(stride, 128);
> +		else if (device->gen < 4 || tiling == I915_TILING_X)
> +			stride = ALIGN(stride, 512);
> +		else
> +			stride = ALIGN(stride, 128);
> +		if (device->gen < 4)
> +			stride = 1 << fls(stride - 1);
Shouldn't 'else' be last here ^? What about order of 'else if' and 'if'
at the end?
> +	} else {
> +		if (device->gen >= 8)
> +			stride = ALIGN(stride, 64);
> +	}
> +
> +	igt_assert(stride < UINT16_MAX && stride >= 4*width);
> +	return stride;
> +}
> +
> +static unsigned int
> +get_tiling_height(const struct device *device,
> +		  unsigned int height, unsigned int tiling)
> +{
> +	if (!tiling)
> +		return height;
> +
> +	if (device->gen < 3)
> +		return ALIGN(height, 16);
> +	else if (device->gen < 4 || tiling == I915_TILING_X)
> +		return ALIGN(height, 8);
> +	else
> +		return ALIGN(height, 32);
> +}
> +
> +static struct buffer *buffer_create(const struct device *device,
> +				    unsigned int width,
> +				    unsigned int height)
> +{
> +	struct buffer *buffer;
> +
> +	igt_assert(width && height);
> +
> +	buffer = calloc(1, sizeof(*buffer) + 4u * width * height);
> +	if (!buffer)
> +		return NULL;
> +
> +	buffer->width = width;
> +	buffer->height = height;
> +
> +	buffer->stride = get_tiling_stride(device, width, I915_TILING_NONE);
> +	buffer->size = ALIGN(buffer->stride * height, 4096);
> +	buffer->handle = gem_create(device->fd, buffer->size);
> +	buffer->caching = device->llc;
> +
> +	for (int y = 0; y < height; y++) {
> +		uint32_t *row = buffer->model + y * width;
> +
> +		for (int x = 0; x < width; x++)
> +			row[x] = (y << 16 | x) ^ buffer->handle;
> +
> +		gem_write(device->fd,
> +			  buffer->handle, 4u * y * width,
> +			  row, 4u * width);
> +	}
> +
> +	return buffer;
> +}
> +
> +static void buffer_set_tiling(const struct device *device,
> +			      struct buffer *buffer,
> +			      unsigned int tiling)
> +{
> +	struct drm_i915_gem_exec_object2 obj[3];
> +	struct drm_i915_gem_relocation_entry reloc[2];
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	const bool has_64b_reloc = device->gen >= 8;
> +	uint32_t stride, size, pitch;
> +	uint32_t *batch;
> +	int i;
> +
> +	if (buffer->tiling == tiling)
> +		return;
> +
> +	stride = get_tiling_stride(device, buffer->width, tiling);
> +	size = stride * get_tiling_height(device, buffer->height, tiling);
> +	size = ALIGN(size, 4096);
> +
> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = to_user_pointer(obj);
> +	execbuf.buffer_count = ARRAY_SIZE(obj);
> +	if (device->gen >= 6)
> +		execbuf.flags = I915_EXEC_BLT;
> +
> +	memset(obj, 0, sizeof(obj));
> +	obj[0].handle = gem_create(device->fd, size);
> +	if (tiling) {
> +		obj[0].flags = EXEC_OBJECT_NEEDS_FENCE;
> +		gem_set_tiling(device->fd, obj[0].handle, tiling, stride);
> +	}
> +
> +	obj[1].handle = buffer->handle;
> +	obj[1].offset = buffer->gtt_offset;
> +	if (buffer->tiling)
> +		obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
> +
> +	obj[2].handle = gem_create(device->fd, 4096);
> +	obj[2].relocs_ptr = to_user_pointer(memset(reloc, 0, sizeof(reloc)));
> +	obj[2].relocation_count = 2;
> +	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
> +
> +	i = 0;
> +
> +	if ((tiling | buffer->tiling) >= I915_TILING_Y) {
> +		unsigned int mask;
> +
> +		batch[i++] = MI_LOAD_REGISTER_IMM;
> +		batch[i++] = BCS_SWCTRL;
> +

All batch creation staring from here ^ looks misterious. Maybe we can make
buffer_set_tiling and buffer_linear functions more clearer?
It is hard do read and understand.

> +		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
> +		if (buffer->tiling == I915_TILING_Y)
> +			mask |= BCS_SRC_Y;
> +		if (tiling == I915_TILING_Y)
> +			mask |= BCS_DST_Y;
> +		batch[i++] = mask;
> +	}
> +
> +	batch[i] = (XY_SRC_COPY_BLT_CMD |
> +		    XY_SRC_COPY_BLT_WRITE_ALPHA |
> +		    XY_SRC_COPY_BLT_WRITE_RGB);
> +	batch[i] |= 6 + 2 * has_64b_reloc;
> +	if (device->gen >= 4 && buffer->tiling)
> +		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
> +	if (device->gen >= 4 && tiling)
> +		batch[i] |= XY_SRC_COPY_BLT_DST_TILED;
> +	i++;
> +
> +	pitch = stride;
> +	if (device->gen >= 4 && tiling)
> +		pitch /= 4;
> +	batch[i++] = 3 << 24 | 0xcc << 16 | pitch;
> +	batch[i++] = 0;
> +	batch[i++] = buffer->height << 16 | buffer->width;
> +	reloc[0].target_handle = obj[0].handle;
> +	reloc[0].presumed_offset = obj[0].offset;
> +	reloc[0].offset = sizeof(*batch) * i;
> +	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> +	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = obj[0].offset;
> +	if (has_64b_reloc)
> +		batch[i++] = obj[0].offset >> 32;
> +
> +	batch[i++] = 0;
> +	pitch = buffer->stride;
> +	if (device->gen >= 4 && buffer->tiling)
> +		pitch /= 4;
> +	batch[i++] = pitch;
> +	reloc[1].target_handle = obj[1].handle;
> +	reloc[1].presumed_offset = obj[1].offset;
> +	reloc[1].offset = sizeof(*batch) * i;
> +	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = obj[1].offset;
> +	if (has_64b_reloc)
> +		batch[i++] = obj[1].offset >> 32;
> +
> +	if ((tiling | buffer->tiling) >= I915_TILING_Y) {
> +		igt_assert(device->gen >= 6);
> +		batch[i++] = MI_FLUSH_DW | 2;
> +		batch[i++] = 0;
> +		batch[i++] = 0;
> +		batch[i++] = 0;
> +
> +		batch[i++] = MI_LOAD_REGISTER_IMM;
> +		batch[i++] = BCS_SWCTRL;
> +		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
> +	}
> +
> +	batch[i++] = MI_BATCH_BUFFER_END;
> +	munmap(batch, 4096);
> +
> +	gem_execbuf(device->fd, &execbuf);
> +
> +	gem_close(device->fd, obj[2].handle);
> +	gem_close(device->fd, obj[1].handle);
> +
> +	buffer->gtt_offset = obj[0].offset;
> +	buffer->handle = obj[0].handle;
> +
> +	buffer->tiling = tiling;
> +	buffer->stride = stride;
> +	buffer->size = size;
> +}
> +
> +enum mode {
> +	CPU,
> +	PRW,
> +	GTT,
> +	WC,
> +};
> +
> +static void blit_to_linear(const struct device *device,
> +			   const struct buffer *buffer,
> +			   void *linear)
> +{
> +	struct drm_i915_gem_exec_object2 obj[3];
> +	struct drm_i915_gem_relocation_entry reloc[2];
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	const bool has_64b_reloc = device->gen >= 8;
> +	uint32_t *batch;
> +	uint32_t pitch;
> +	int i = 0;
> +
> +	igt_assert(buffer->tiling);
> +
> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = to_user_pointer(obj);
> +	execbuf.buffer_count = ARRAY_SIZE(obj);
> +	if (device->gen >= 6)
> +		execbuf.flags = I915_EXEC_BLT;
> +
> +	memset(obj, 0, sizeof(obj));
> +	gem_userptr(device->fd, linear, buffer->size, 0, 0, &obj[0].handle);
> +	obj[1].handle = buffer->handle;
> +	obj[1].offset = buffer->gtt_offset;
> +	obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
> +
> +	memset(reloc, 0, sizeof(reloc));
> +	obj[2].handle = gem_create(device->fd, 4096);
> +	obj[2].relocs_ptr = to_user_pointer(reloc);
> +	obj[2].relocation_count = ARRAY_SIZE(reloc);
> +	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
> +
> +	if (buffer->tiling >= I915_TILING_Y) {
> +		unsigned int mask;
> +
> +		batch[i++] = MI_LOAD_REGISTER_IMM;
> +		batch[i++] = BCS_SWCTRL;
> +
> +		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
> +		if (buffer->tiling == I915_TILING_Y)
> +			mask |= BCS_SRC_Y;
> +		batch[i++] = mask;
> +	}
> +
> +	batch[i] = (XY_SRC_COPY_BLT_CMD |
> +		    XY_SRC_COPY_BLT_WRITE_ALPHA |
> +		    XY_SRC_COPY_BLT_WRITE_RGB);
> +	if (device->gen >= 4 && buffer->tiling)
> +		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
> +
> +	batch[i++] |= 6 + 2 * has_64b_reloc;
> +
> +	batch[i++] = 3 << 24 | 0xcc << 16 | buffer->stride;
> +	batch[i++] = 0;
> +	batch[i++] = buffer->height << 16 | buffer->width;
> +	reloc[0].target_handle = obj[0].handle;
> +	reloc[0].presumed_offset = obj[0].offset;
> +	reloc[0].offset = sizeof(*batch) * i;
> +	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> +	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = obj[0].offset;
> +	if (has_64b_reloc)
> +		batch[i++] = obj[0].offset >> 32;
> +
> +	batch[i++] = 0;
> +	pitch = buffer->stride;
> +	if (device->gen >= 4 && buffer->tiling)
> +		pitch /= 4;
> +	batch[i++] = pitch;
> +	reloc[1].target_handle = obj[1].handle;
> +	reloc[1].presumed_offset = obj[1].offset;
> +	reloc[1].offset = sizeof(*batch) * i;
> +	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = obj[1].offset;
> +	if (has_64b_reloc)
> +		batch[i++] = obj[1].offset >> 32;
> +
> +	if (buffer->tiling >= I915_TILING_Y) {
> +		igt_assert(device->gen >= 6);
> +		batch[i++] = MI_FLUSH_DW | 2;
> +		batch[i++] = 0;
> +		batch[i++] = 0;
> +		batch[i++] = 0;
> +
> +		batch[i++] = MI_LOAD_REGISTER_IMM;
> +		batch[i++] = BCS_SWCTRL;
> +		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
> +	}
> +
> +	batch[i++] = MI_BATCH_BUFFER_END;
> +	munmap(batch, 4096);
> +
> +	gem_execbuf(device->fd, &execbuf);
> +	gem_close(device->fd, obj[2].handle);
> +
> +	gem_sync(device->fd, obj[0].handle);
> +	gem_close(device->fd, obj[0].handle);
> +}
> +
> +static void *download(const struct device *device,
> +		      const struct buffer *buffer,
> +		      enum mode mode)
> +{
> +	void *linear, *src;
> +
> +	igt_assert(posix_memalign(&linear, 4096, buffer->size) == 0);
> +
> +	switch (mode) {
> +	case CPU:
> +		if (buffer->tiling) {
> +			if (buffer->caching && !device->llc) {
> +				mode = GTT;
> +				break;
> +			}
> +			if (device->gen < 3) {
> +				mode = GTT;
> +				break;
> +			}
Why do we need 2 if-s here ^? Both are setting the same mode.
> +
> +			blit_to_linear(device, buffer, linear);
> +			return linear;
> +		}
> +		break;
> +
> +	case PRW:
> +	case WC:
> +		if (!buffer->tiling)
> +			break;
> +
> +	default:
> +		mode = GTT;
> +		break;
> +	}
> +
> +	switch (mode) {
> +	case CPU:
> +		src = gem_mmap__cpu(device->fd, buffer->handle,
> +				    0, buffer->size,
> +				    PROT_READ);
> +
> +		gem_set_domain(device->fd, buffer->handle,
> +			       I915_GEM_DOMAIN_CPU, 0);
> +		igt_memcpy_from_wc(linear, src, buffer->size);
> +		munmap(src, buffer->size);
> +		break;
> +
> +	case WC:
> +		src = gem_mmap__wc(device->fd, buffer->handle,
> +				   0, buffer->size,
> +				   PROT_READ);
> +
> +		gem_set_domain(device->fd, buffer->handle,
> +			       I915_GEM_DOMAIN_WC, 0);
> +		igt_memcpy_from_wc(linear, src, buffer->size);
> +		munmap(src, buffer->size);
> +		break;
> +
> +	case GTT:
> +		src = gem_mmap__gtt(device->fd, buffer->handle,
> +				   buffer->size,
> +				   PROT_READ);
> +
> +		gem_set_domain(device->fd, buffer->handle,
> +			       I915_GEM_DOMAIN_GTT, 0);
> +		igt_memcpy_from_wc(linear, src, buffer->size);
> +		munmap(src, buffer->size);
> +		break;
> +
> +	case PRW:
> +		gem_read(device->fd, buffer->handle, 0, linear, buffer->size);
> +		break;
> +	}
> +
> +	return linear;
> +}
> +
> +static bool buffer_check(const struct device *device,
> +			 const struct buffer *buffer,
> +			 enum mode mode)
> +{
> +	unsigned int num_errors = 0;
> +	uint32_t *linear;
> +
> +	linear = download(device, buffer, mode);
> +	igt_assert(linear);
> +
> +	for (int y = 0; y < buffer->height; y++) {
> +		const uint32_t *model = buffer->model + y * buffer->width;
> +		const uint32_t *row =
> +			linear + y * buffer->stride / sizeof(uint32_t);
> +
> +		for (int x = 0; x < buffer->width; x++) {
> +			if (row[x] != model[x] && num_errors++ < 5) {
> +				igt_warn("buffer handle=%d mismatch at (%d, %d): expected %08x, found %08x\n",
> +					 buffer->handle,
> +					 x, y, model[x], row[x]);
> +			}
> +		}
> +	}
> +
> +	free(linear);
> +
> +	return num_errors == 0;
> +}
> +
> +static void buffer_free(const struct device *device, struct buffer *buffer)
> +{
> +	igt_assert(buffer_check(device, buffer, GTT));
> +	gem_close(device->fd, buffer->handle);
> +	free(buffer);
> +}
> +
> +static void memcpy_blt(const void *src, void *dst,
> +		       uint32_t src_stride, uint32_t dst_stride,
> +		       uint16_t src_x, uint16_t src_y,
> +		       uint16_t dst_x, uint16_t dst_y,
> +		       uint16_t width, uint16_t height)
> +{
> +	const uint8_t *src_bytes;
> +	uint8_t *dst_bytes;
> +	int byte_width;
> +
> +	src_bytes = (const uint8_t *)src + src_stride * src_y + src_x * 4;
> +	dst_bytes = (uint8_t *)dst + dst_stride * dst_y + dst_x * 4;
> +
> +	byte_width = width * 4;
> +	if (byte_width == src_stride && byte_width == dst_stride) {
> +		byte_width *= height;
> +		height = 1;
> +	}
> +
> +	switch (byte_width) {
> +	case 4:
> +		do {
> +			*(uint32_t *)dst_bytes = *(const uint32_t *)src_bytes;
> +			src_bytes += src_stride;
> +			dst_bytes += dst_stride;
> +		} while (--height);
> +		break;
> +
> +	case 8:
> +		do {
> +			*(uint64_t *)dst_bytes = *(const uint64_t *)src_bytes;
> +			src_bytes += src_stride;
> +			dst_bytes += dst_stride;
> +		} while (--height);
> +		break;
> +	case 16:
> +		do {
> +			((uint64_t *)dst_bytes)[0] = ((const uint64_t *)src_bytes)[0];
> +			((uint64_t *)dst_bytes)[1] = ((const uint64_t *)src_bytes)[1];
> +			src_bytes += src_stride;
> +			dst_bytes += dst_stride;
> +		} while (--height);
> +		break;
> +
> +	default:
> +		do {
> +			memcpy(dst_bytes, src_bytes, byte_width);
> +			src_bytes += src_stride;
> +			dst_bytes += dst_stride;
> +		} while (--height);
> +		break;
> +	}
> +}
> +
> +static void
> +blit(const struct device *device,
> +     struct buffer *src, uint16_t src_x, uint16_t src_y,
> +     struct buffer *dst, uint16_t dst_x, uint16_t dst_y,
> +     uint16_t width, uint16_t height)
> +
> +{
> +	struct drm_i915_gem_exec_object2 obj[3];
> +	struct drm_i915_gem_relocation_entry reloc[2];
> +	struct drm_i915_gem_execbuffer2 execbuf;
> +	const bool has_64b_reloc = device->gen >= 8;
> +	uint32_t *batch;
> +	uint32_t pitch;
> +	int i = 0;
> +
> +	if (src_x < 0) {
> +		width += src_x;
> +		dst_x -= src_x;
> +		src_x = 0;
> +	}
> +	if (src_y < 0) {
> +		height += src_y;
> +		dst_y  -= src_y;
> +		src_y = 0;
> +	}
> +
> +	if (dst_x < 0) {
> +		width += dst_x;
> +		src_x -= dst_x;
> +		dst_x = 0;
> +	}
> +	if (dst_y < 0) {
> +		height += dst_y;
> +		src_y  -= dst_y;
> +		dst_y = 0;
> +	}
> +
> +	if (src_x + width > src->width)
> +		width = src->width - src_x;
> +	if (dst_x + width > dst->width)
> +		width = dst->width - dst_x;
> +
> +	if (src_y + height > src->height)
> +		height = src->height - src_y;
> +	if (dst_y + height > dst->height)
> +		height = dst->height - dst_y;
> +
> +	if (dst->caching) {
> +		igt_assert(device->gen >= 3);
> +		igt_assert(device->llc || !src->caching);
> +	}
> +
How did you calculated this values above ^ ?

> +	memset(&execbuf, 0, sizeof(execbuf));
> +	execbuf.buffers_ptr = to_user_pointer(obj);
> +	execbuf.buffer_count = ARRAY_SIZE(obj);
> +	if (device->gen >= 6)
> +		execbuf.flags = I915_EXEC_BLT;
> +
> +	memset(obj, 0, sizeof(obj));
> +	obj[0].handle = dst->handle;
> +	obj[0].offset = dst->gtt_offset;
> +	if (dst->tiling)
> +		obj[0].flags = EXEC_OBJECT_NEEDS_FENCE;
> +
> +	obj[1].handle = src->handle;
> +	obj[1].offset = src->gtt_offset;
> +	if (src->tiling)
> +		obj[1].flags = EXEC_OBJECT_NEEDS_FENCE;
> +
> +	memset(reloc, 0, sizeof(reloc));
> +	obj[2].handle = gem_create(device->fd, 4096);
> +	obj[2].relocs_ptr = to_user_pointer(reloc);
> +	obj[2].relocation_count = ARRAY_SIZE(reloc);
> +	batch = gem_mmap__cpu(device->fd, obj[2].handle, 0, 4096, PROT_WRITE);
> +
> +	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
> +		unsigned int mask;
> +
> +		batch[i++] = MI_LOAD_REGISTER_IMM;
> +		batch[i++] = BCS_SWCTRL;
> +
> +		mask = (BCS_SRC_Y | BCS_DST_Y) << 16;
> +		if (src->tiling == I915_TILING_Y)
> +			mask |= BCS_SRC_Y;
> +		if (dst->tiling == I915_TILING_Y)
> +			mask |= BCS_DST_Y;
> +		batch[i++] = mask;
> +	}
> +
> +	batch[i] = (XY_SRC_COPY_BLT_CMD |
> +		    XY_SRC_COPY_BLT_WRITE_ALPHA |
> +		    XY_SRC_COPY_BLT_WRITE_RGB);
> +	batch[i] |= 6 + 2 * has_64b_reloc;
> +	if (device->gen >= 4 && src->tiling)
> +		batch[i] |= XY_SRC_COPY_BLT_SRC_TILED;
> +	if (device->gen >= 4 && dst->tiling)
> +		batch[i] |= XY_SRC_COPY_BLT_DST_TILED;
> +	i++;
> +
> +	pitch = dst->stride;
> +	if (device->gen >= 4 && dst->tiling)
> +		pitch /= 4;
> +	batch[i++] = 3 << 24 | 0xcc << 16 | pitch;
> +
> +	batch[i++] = dst_y << 16 | dst_x;
> +	batch[i++] = (height + dst_y) << 16 | (width + dst_x);
> +	reloc[0].target_handle = obj[0].handle;
> +	reloc[0].presumed_offset = obj[0].offset;
> +	reloc[0].offset = sizeof(*batch) * i;
> +	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> +	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = obj[0].offset;
> +	if (has_64b_reloc)
> +		batch[i++] = obj[0].offset >> 32;
> +
> +	batch[i++] = src_y << 16 | src_x;
> +	pitch = src->stride;
> +	if (device->gen >= 4 && src->tiling)
> +		pitch /= 4;
> +	batch[i++] = pitch;
> +	reloc[1].target_handle = obj[1].handle;
> +	reloc[1].presumed_offset = obj[1].offset;
> +	reloc[1].offset = sizeof(*batch) * i;
> +	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> +	batch[i++] = obj[1].offset;
> +	if (has_64b_reloc)
> +		batch[i++] = obj[1].offset >> 32;
> +
> +	if ((src->tiling | dst->tiling) >= I915_TILING_Y) {
> +		igt_assert(device->gen >= 6);
> +		batch[i++] = MI_FLUSH_DW | 2;
> +		batch[i++] = 0;
> +		batch[i++] = 0;
> +		batch[i++] = 0;
> +
> +		batch[i++] = MI_LOAD_REGISTER_IMM;
> +		batch[i++] = BCS_SWCTRL;
> +		batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16;
> +	}
> +
> +	batch[i++] = MI_BATCH_BUFFER_END;
> +	munmap(batch, 4096);
> +
> +	gem_execbuf(device->fd, &execbuf);
> +	gem_close(device->fd, obj[2].handle);
> +
> +	dst->gtt_offset = obj[0].offset;
> +	src->gtt_offset = obj[1].offset;
> +
> +	memcpy_blt(src->model, dst->model,
> +		   4u * src->width, 4u * dst->width,
> +		   src_x, src_y,
> +		   dst_x, dst_y,
> +		   width, height);
> +}
> +
> +igt_main
> +{
> +	struct device device;
> +
> +	igt_fixture {
> +		device.fd = drm_open_driver_render(DRIVER_INTEL);
> +		igt_require_gem(device.fd);
> +
> +		device.pciid = intel_get_drm_devid(device.fd);
> +		device.gen = intel_gen(device.pciid);
> +		device.llc = gem_has_llc(device.fd);
> +	}
> +
> +	igt_subtest("basic") {
> +		struct buffer *src, *dst;
> +		unsigned int x, y;
> +
> +		for (unsigned int height = 1; height <= 16; height <<= 1) {
> +			for (unsigned int width = 1; width <= 64; width <<= 1) {
> +				src = buffer_create(&device,
> +						    width * 16, height * 4);
> +				dst = buffer_create(&device,
> +						    width * 16, height * 4);
> +
> +				y = 0;
> +				for (unsigned int src_tiling = I915_TILING_NONE;
> +				     src_tiling <= (device.gen >= 6 ? I915_TILING_Y : I915_TILING_X);
> +				     src_tiling++) {
> +					buffer_set_tiling(&device, src, src_tiling);
> +
> +					x = 0;
> +					for (unsigned int dst_tiling = I915_TILING_NONE;
> +					     dst_tiling <= (device.gen >= 6 ? I915_TILING_Y : I915_TILING_X);
> +					     dst_tiling++) {
> +						buffer_set_tiling(&device, dst, dst_tiling);
> +
> +						for (enum mode down = CPU; down <= WC; down++) {
> +							igt_debug("Testing src_tiling=%d, dst_tiling=%d, down=%d at (%d, %d) x (%d, %d)\n",
> +								  src_tiling,
> +								  dst_tiling,
> +								  down, x, y,
> +								  width, height);
> +
> +							igt_assert(x + width <= dst->width);
> +							igt_assert(y + height <= dst->height);
> +
> +							blit(&device,
> +							     src, x, y,
> +							     dst, x, y,
> +							     width, height);
> +							igt_assert(buffer_check(&device, dst, down));
> +
> +							x += width;
> +						}
> +					}
> +
> +					y += height;
> +				}
> +
> +				buffer_free(&device, dst);
> +				buffer_free(&device, src);
> +			}
> +		}
> +	}
> +}

Whole test with all helper functions seems to be not clear enough. 
I would prefer to have more comments of what is going on in the code,
especially in creating batches.

General question - does this test gives additional coverge or will be
a substitue for some existing?

Kasia :)

> diff --git a/tests/meson.build b/tests/meson.build
> index 32c2156c6..becadfa8d 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -18,6 +18,7 @@ test_progs = [
>  	'drv_suspend',
>  	'gem_bad_reloc',
>  	'gem_basic',
> +	'gem_blits',
>  	'gem_busy',
>  	'gem_caching',
>  	'gem_close',
> -- 
> 2.18.0
> 
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits
  2018-08-01 14:47 ` [igt-dev] [PATCH i-g-t] " Katarzyna Dec
@ 2018-08-01 15:10   ` Chris Wilson
  2018-08-02 11:05     ` Katarzyna Dec
  0 siblings, 1 reply; 6+ messages in thread
From: Chris Wilson @ 2018-08-01 15:10 UTC (permalink / raw)
  To: Katarzyna Dec; +Cc: igt-dev

Quoting Katarzyna Dec (2018-08-01 15:47:39)
> On Wed, Jul 25, 2018 at 10:38:23PM +0100, Chris Wilson wrote:
> > +static unsigned int
> > +get_tiling_stride(const struct device *device,
> > +               unsigned int width, unsigned int tiling)
> > +{
> > +     unsigned int stride = 4u * width;
> > +
> > +     if (tiling) {
> > +             if (device->gen < 3)
> > +                     stride = ALIGN(stride, 128);
> > +             else if (device->gen < 4 || tiling == I915_TILING_X)
> > +                     stride = ALIGN(stride, 512);
> > +             else
> > +                     stride = ALIGN(stride, 128);
> > +             if (device->gen < 4)
> > +                     stride = 1 << fls(stride - 1);
> Shouldn't 'else' be last here ^? What about order of 'else if' and 'if'
> at the end?

First align to the stride, then round up to a pot for old gen. It can be
done either away around; the rule is just it has to be both a multiple
of tile_width and a power of two.

> > +     } else {
> > +             if (device->gen >= 8)
> > +                     stride = ALIGN(stride, 64);

This is one that is more dubious as it papering over a hw bump that is a
bit more subtle. After all part of the reason for this test is to detect
errors like that.

> > +     if ((tiling | buffer->tiling) >= I915_TILING_Y) {
> > +             unsigned int mask;
> > +
> > +             batch[i++] = MI_LOAD_REGISTER_IMM;
> > +             batch[i++] = BCS_SWCTRL;
> > +
> 
> All batch creation staring from here ^ looks misterious. Maybe we can make
> buffer_set_tiling and buffer_linear functions more clearer?
> It is hard do read and understand.

About the only thing I might touch is the lri. But I am not that
convinced it's worth it in the grand scheme.

> > +static void *download(const struct device *device,
> > +                   const struct buffer *buffer,
> > +                   enum mode mode)
> > +{
> > +     void *linear, *src;
> > +
> > +     igt_assert(posix_memalign(&linear, 4096, buffer->size) == 0);
> > +
> > +     switch (mode) {
> > +     case CPU:
> > +             if (buffer->tiling) {
> > +                     if (buffer->caching && !device->llc) {
> > +                             mode = GTT;
> > +                             break;
> > +                     }
> > +                     if (device->gen < 3) {
> > +                             mode = GTT;
> > +                             break;
> > +                     }
> Why do we need 2 if-s here ^? Both are setting the same mode.

Semantics, they are for different reasons, but related reasons.
Not allowed to copy between snooped surfaces; and gen2 doesn't allow
copy to snoop, just copy from snoop.

> > +     if (dst->caching) {
> > +             igt_assert(device->gen >= 3);
> > +             igt_assert(device->llc || !src->caching);
> > +     }
> > +
> How did you calculated this values above ^ ?

^^

> Whole test with all helper functions seems to be not clear enough. 
> I would prefer to have more comments of what is going on in the code,
> especially in creating batches.

There isn't much going on, no tricks, just straight forward XY_SRC_COPY.
 
> General question - does this test gives additional coverge or will be
> a substitue for some existing?

Both more and less at the moment. There's better testing for the copy
and access patterns, but less around the limits of the GTT. But atm we
only do the sanitychecks in CI, so from that perspective this is much
more complete testing of things like pixel/cacheline/page access between
tiling modes.

The challenge I have in mind is how to let it explore by itself. A small
bytecode to try out different combinations of state is one way, but that
only covers the primitives we have modelled and therein lies the rub.
How to go completely crazy yet still detect error (other than oopses).
-Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits
  2018-08-01 15:10   ` Chris Wilson
@ 2018-08-02 11:05     ` Katarzyna Dec
  0 siblings, 0 replies; 6+ messages in thread
From: Katarzyna Dec @ 2018-08-02 11:05 UTC (permalink / raw)
  To: Chris Wilson; +Cc: igt-dev

On Wed, Aug 01, 2018 at 04:10:07PM +0100, Chris Wilson wrote:
> Quoting Katarzyna Dec (2018-08-01 15:47:39)
> > On Wed, Jul 25, 2018 at 10:38:23PM +0100, Chris Wilson wrote:
> > > +static unsigned int
> > > +get_tiling_stride(const struct device *device,
> > > +               unsigned int width, unsigned int tiling)
> > > +{
> > > +     unsigned int stride = 4u * width;
> > > +
> > > +     if (tiling) {
                    /* align to the stride */
> > > +             if (device->gen < 3)
> > > +                     stride = ALIGN(stride, 128);
> > > +             else if (device->gen < 4 || tiling == I915_TILING_X)
> > > +                     stride = ALIGN(stride, 512);
> > > +             else
> > > +                     stride = ALIGN(stride, 128);
                    /* round up to a pot for old gen */
> > > +             if (device->gen < 4)
> > > +                     stride = 1 << fls(stride - 1);
> > Shouldn't 'else' be last here ^? What about order of 'else if' and 'if'
> > at the end?
> 
> First align to the stride, then round up to a pot for old gen. It can be
> done either away around; the rule is just it has to be both a multiple
> of tile_width and a power of two.
>
Now I understand. Maybe it would be worth to add some comment about that
in the code? It would be much more clear for everybody.
> > > +     } else {
> > > +             if (device->gen >= 8)
> > > +                     stride = ALIGN(stride, 64);
> 
> This is one that is more dubious as it papering over a hw bump that is a
> bit more subtle. After all part of the reason for this test is to detect
> errors like that.
> 
> > > +     if ((tiling | buffer->tiling) >= I915_TILING_Y) {
> > > +             unsigned int mask;
> > > +
> > > +             batch[i++] = MI_LOAD_REGISTER_IMM;
> > > +             batch[i++] = BCS_SWCTRL;
> > > +
> > 
> > All batch creation staring from here ^ looks misterious. Maybe we can make
> > buffer_set_tiling and buffer_linear functions more clearer?
> > It is hard do read and understand.
> 
> About the only thing I might touch is the lri. But I am not that
> convinced it's worth it in the grand scheme.
>
Some documentation is better then no documentation. I know that code should be
it's own judge, but you need to have deep understanding in driver code to get
what is going on with creating these batches.
And by docs I do not mean every super formatted detail, just small comments
inline comments.

> > > +static void *download(const struct device *device,
> > > +                   const struct buffer *buffer,
> > > +                   enum mode mode)
> > > +{
> > > +     void *linear, *src;
> > > +
> > > +     igt_assert(posix_memalign(&linear, 4096, buffer->size) == 0);
> > > +
> > > +     switch (mode) {
> > > +     case CPU:
> > > +             if (buffer->tiling) {
> > > +                     if (buffer->caching && !device->llc) {
> > > +                             mode = GTT;
> > > +                             break;
> > > +                     }
> > > +                     if (device->gen < 3) {
> > > +                             mode = GTT;
> > > +                             break;
> > > +                     }
> > Why do we need 2 if-s here ^? Both are setting the same mode.
> 
> Semantics, they are for different reasons, but related reasons.
> Not allowed to copy between snooped surfaces; and gen2 doesn't allow
> copy to snoop, just copy from snoop.
Same as above, it would be nice to heve even one line comment here.
> 
> > > +     if (dst->caching) {
> > > +             igt_assert(device->gen >= 3);
> > > +             igt_assert(device->llc || !src->caching);
> > > +     }
> > > +
> > How did you calculated this values above ^ ?
> 
> ^^
> 
> > Whole test with all helper functions seems to be not clear enough. 
> > I would prefer to have more comments of what is going on in the code,
> > especially in creating batches.
> 
> There isn't much going on, no tricks, just straight forward XY_SRC_COPY.
>
It would be nice if you introduce some one-line comments in few places. Maybe
code is simple, but it doesn't look like that.

Kasia :)
> > General question - does this test gives additional coverge or will be
> > a substitue for some existing?
> 
> Both more and less at the moment. There's better testing for the copy
> and access patterns, but less around the limits of the GTT. But atm we
> only do the sanitychecks in CI, so from that perspective this is much
> more complete testing of things like pixel/cacheline/page access between
> tiling modes.
> 
> The challenge I have in mind is how to let it explore by itself. A small
> bytecode to try out different combinations of state is one way, but that
> only covers the primitives we have modelled and therein lies the rub.
> How to go completely crazy yet still detect error (other than oopses).
> -Chris
_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-08-02 11:05 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-07-25 21:38 [igt-dev] [PATCH i-g-t] igt: Another combinatorial exercise for blits Chris Wilson
2018-07-26  0:19 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork
2018-07-26  3:09 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork
2018-08-01 14:47 ` [igt-dev] [PATCH i-g-t] " Katarzyna Dec
2018-08-01 15:10   ` Chris Wilson
2018-08-02 11:05     ` Katarzyna Dec

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).