* [igt-dev] [PATCH i-g-t] igt/gem_exec_gttfill: Avoid pwrite into busy handle @ 2018-06-28 21:35 ` Chris Wilson 0 siblings, 0 replies; 18+ messages in thread From: Chris Wilson @ 2018-06-28 21:35 UTC (permalink / raw) To: intel-gfx; +Cc: igt-dev The goal of gem_exec_gttfill is to exercise execbuf under heavy GTT pressure (by trying to execute more objects than may fit into the GTT). We spread the same set of handles across different processes, with the result that each would occasionally stall waiting for execution of an unrelated batch, limiting the pressure we were applying. If we using a steaming write via a WC pointer, we can avoid the serialisation penalty and so submit faster. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- tests/gem_exec_gttfill.c | 66 +++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 24 deletions(-) diff --git a/tests/gem_exec_gttfill.c b/tests/gem_exec_gttfill.c index 4097e4077..efd612bb6 100644 --- a/tests/gem_exec_gttfill.c +++ b/tests/gem_exec_gttfill.c @@ -28,18 +28,25 @@ IGT_TEST_DESCRIPTION("Fill the GTT with batches."); #define BATCH_SIZE (4096<<10) -static void xchg_u32(void *array, unsigned i, unsigned j) +struct batch { + uint32_t handle; + void *ptr; +}; + +static void xchg_batch(void *array, unsigned int i, unsigned int j) { - uint32_t *u32 = array; - uint32_t tmp = u32[i]; - u32[i] = u32[j]; - u32[j] = tmp; + struct batch *batches = array; + struct batch tmp; + + tmp = batches[i]; + batches[i] = batches[j]; + batches[j] = tmp; } static void submit(int fd, int gen, struct drm_i915_gem_execbuffer2 *eb, struct drm_i915_gem_relocation_entry *reloc, - uint32_t *handles, unsigned count) + struct batch *batches, unsigned int count) { struct drm_i915_gem_exec_object2 obj; uint32_t batch[16]; @@ -80,7 +87,7 @@ static void submit(int fd, int gen, eb->buffers_ptr = to_user_pointer(&obj); for (unsigned i = 0; i < count; i++) { - obj.handle = handles[i]; + obj.handle = batches[i].handle; reloc[0].target_handle = obj.handle; reloc[1].target_handle = obj.handle; @@ -88,8 +95,8 @@ static void submit(int fd, int gen, reloc[0].presumed_offset = obj.offset; reloc[1].presumed_offset = obj.offset; - gem_write(fd, obj.handle, eb->batch_start_offset, - batch, sizeof(batch)); + memcpy(batches[i].ptr + eb->batch_start_offset, + batch, sizeof(batch)); gem_execbuf(fd, eb); } @@ -103,7 +110,7 @@ static void fillgtt(int fd, unsigned ring, int timeout) struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_relocation_entry reloc[2]; volatile uint64_t *shared; - unsigned *handles; + struct batch *batches; unsigned engines[16]; unsigned nengine; unsigned engine; @@ -145,29 +152,38 @@ static void fillgtt(int fd, unsigned ring, int timeout) if (gen < 6) execbuf.flags |= I915_EXEC_SECURE; - handles = calloc(count, sizeof(handles)); - igt_assert(handles); - for (unsigned i = 0; i < count; i++) - handles[i] = gem_create(fd, BATCH_SIZE); + batches = calloc(count, sizeof(*batches)); + igt_assert(batches); + for (unsigned i = 0; i < count; i++) { + batches[i].handle = gem_create(fd, BATCH_SIZE); + batches[i].ptr = + __gem_mmap__wc(fd, batches[i].handle, + 0, BATCH_SIZE, PROT_WRITE); + if (!batches[i].ptr) { + batches[i].ptr = + __gem_mmap__gtt(fd, batches[i].handle, + BATCH_SIZE, PROT_WRITE); + } + igt_require(batches[i].ptr); + } /* Flush all memory before we start the timer */ - submit(fd, gen, &execbuf, reloc, handles, count); + submit(fd, gen, &execbuf, reloc, batches, count); igt_fork(child, nengine) { uint64_t cycles = 0; hars_petruska_f54_1_random_perturb(child); - igt_permute_array(handles, count, xchg_u32); + igt_permute_array(batches, count, xchg_batch); execbuf.batch_start_offset = child*64; execbuf.flags |= engines[child]; igt_until_timeout(timeout) { - submit(fd, gen, &execbuf, reloc, handles, count); + submit(fd, gen, &execbuf, reloc, batches, count); for (unsigned i = 0; i < count; i++) { - uint32_t handle = handles[i]; - uint64_t buf[2]; + uint64_t offset, delta; - gem_read(fd, handle, reloc[1].offset, &buf[0], sizeof(buf[0])); - gem_read(fd, handle, reloc[0].delta, &buf[1], sizeof(buf[1])); - igt_assert_eq_u64(buf[0], buf[1]); + offset = *(uint64_t *)(batches[i].ptr + reloc[1].offset); + delta = *(uint64_t *)(batches[i].ptr + reloc[0].delta); + igt_assert_eq_u64(offset, delta); } cycles++; } @@ -176,8 +192,10 @@ static void fillgtt(int fd, unsigned ring, int timeout) } igt_waitchildren(); - for (unsigned i = 0; i < count; i++) - gem_close(fd, handles[i]); + for (unsigned i = 0; i < count; i++) { + munmap(batches[i].ptr, BATCH_SIZE); + gem_close(fd, batches[i].handle); + } shared[nengine] = 0; for (unsigned i = 0; i < nengine; i++) -- 2.18.0 _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH i-g-t] igt/gem_exec_gttfill: Avoid pwrite into busy handle @ 2018-06-28 21:35 ` Chris Wilson 0 siblings, 0 replies; 18+ messages in thread From: Chris Wilson @ 2018-06-28 21:35 UTC (permalink / raw) To: intel-gfx; +Cc: igt-dev The goal of gem_exec_gttfill is to exercise execbuf under heavy GTT pressure (by trying to execute more objects than may fit into the GTT). We spread the same set of handles across different processes, with the result that each would occasionally stall waiting for execution of an unrelated batch, limiting the pressure we were applying. If we using a steaming write via a WC pointer, we can avoid the serialisation penalty and so submit faster. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- tests/gem_exec_gttfill.c | 66 +++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 24 deletions(-) diff --git a/tests/gem_exec_gttfill.c b/tests/gem_exec_gttfill.c index 4097e4077..efd612bb6 100644 --- a/tests/gem_exec_gttfill.c +++ b/tests/gem_exec_gttfill.c @@ -28,18 +28,25 @@ IGT_TEST_DESCRIPTION("Fill the GTT with batches."); #define BATCH_SIZE (4096<<10) -static void xchg_u32(void *array, unsigned i, unsigned j) +struct batch { + uint32_t handle; + void *ptr; +}; + +static void xchg_batch(void *array, unsigned int i, unsigned int j) { - uint32_t *u32 = array; - uint32_t tmp = u32[i]; - u32[i] = u32[j]; - u32[j] = tmp; + struct batch *batches = array; + struct batch tmp; + + tmp = batches[i]; + batches[i] = batches[j]; + batches[j] = tmp; } static void submit(int fd, int gen, struct drm_i915_gem_execbuffer2 *eb, struct drm_i915_gem_relocation_entry *reloc, - uint32_t *handles, unsigned count) + struct batch *batches, unsigned int count) { struct drm_i915_gem_exec_object2 obj; uint32_t batch[16]; @@ -80,7 +87,7 @@ static void submit(int fd, int gen, eb->buffers_ptr = to_user_pointer(&obj); for (unsigned i = 0; i < count; i++) { - obj.handle = handles[i]; + obj.handle = batches[i].handle; reloc[0].target_handle = obj.handle; reloc[1].target_handle = obj.handle; @@ -88,8 +95,8 @@ static void submit(int fd, int gen, reloc[0].presumed_offset = obj.offset; reloc[1].presumed_offset = obj.offset; - gem_write(fd, obj.handle, eb->batch_start_offset, - batch, sizeof(batch)); + memcpy(batches[i].ptr + eb->batch_start_offset, + batch, sizeof(batch)); gem_execbuf(fd, eb); } @@ -103,7 +110,7 @@ static void fillgtt(int fd, unsigned ring, int timeout) struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_relocation_entry reloc[2]; volatile uint64_t *shared; - unsigned *handles; + struct batch *batches; unsigned engines[16]; unsigned nengine; unsigned engine; @@ -145,29 +152,38 @@ static void fillgtt(int fd, unsigned ring, int timeout) if (gen < 6) execbuf.flags |= I915_EXEC_SECURE; - handles = calloc(count, sizeof(handles)); - igt_assert(handles); - for (unsigned i = 0; i < count; i++) - handles[i] = gem_create(fd, BATCH_SIZE); + batches = calloc(count, sizeof(*batches)); + igt_assert(batches); + for (unsigned i = 0; i < count; i++) { + batches[i].handle = gem_create(fd, BATCH_SIZE); + batches[i].ptr = + __gem_mmap__wc(fd, batches[i].handle, + 0, BATCH_SIZE, PROT_WRITE); + if (!batches[i].ptr) { + batches[i].ptr = + __gem_mmap__gtt(fd, batches[i].handle, + BATCH_SIZE, PROT_WRITE); + } + igt_require(batches[i].ptr); + } /* Flush all memory before we start the timer */ - submit(fd, gen, &execbuf, reloc, handles, count); + submit(fd, gen, &execbuf, reloc, batches, count); igt_fork(child, nengine) { uint64_t cycles = 0; hars_petruska_f54_1_random_perturb(child); - igt_permute_array(handles, count, xchg_u32); + igt_permute_array(batches, count, xchg_batch); execbuf.batch_start_offset = child*64; execbuf.flags |= engines[child]; igt_until_timeout(timeout) { - submit(fd, gen, &execbuf, reloc, handles, count); + submit(fd, gen, &execbuf, reloc, batches, count); for (unsigned i = 0; i < count; i++) { - uint32_t handle = handles[i]; - uint64_t buf[2]; + uint64_t offset, delta; - gem_read(fd, handle, reloc[1].offset, &buf[0], sizeof(buf[0])); - gem_read(fd, handle, reloc[0].delta, &buf[1], sizeof(buf[1])); - igt_assert_eq_u64(buf[0], buf[1]); + offset = *(uint64_t *)(batches[i].ptr + reloc[1].offset); + delta = *(uint64_t *)(batches[i].ptr + reloc[0].delta); + igt_assert_eq_u64(offset, delta); } cycles++; } @@ -176,8 +192,10 @@ static void fillgtt(int fd, unsigned ring, int timeout) } igt_waitchildren(); - for (unsigned i = 0; i < count; i++) - gem_close(fd, handles[i]); + for (unsigned i = 0; i < count; i++) { + munmap(batches[i].ptr, BATCH_SIZE); + gem_close(fd, batches[i].handle); + } shared[nengine] = 0; for (unsigned i = 0; i < nengine; i++) -- 2.18.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 18+ messages in thread
* [igt-dev] ✓ Fi.CI.BAT: success for igt/gem_exec_gttfill: Avoid pwrite into busy handle 2018-06-28 21:35 ` Chris Wilson (?) @ 2018-06-29 0:21 ` Patchwork -1 siblings, 0 replies; 18+ messages in thread From: Patchwork @ 2018-06-29 0:21 UTC (permalink / raw) To: Chris Wilson; +Cc: igt-dev == Series Details == Series: igt/gem_exec_gttfill: Avoid pwrite into busy handle URL : https://patchwork.freedesktop.org/series/45620/ State : success == Summary == = CI Bug Log - changes from CI_DRM_4401 -> IGTPW_1511 = == Summary - SUCCESS == No regressions found. External URL: https://patchwork.freedesktop.org/api/1.0/series/45620/revisions/1/mbox/ == Known issues == Here are the changes found in IGTPW_1511 that come from known issues: === IGT changes === ==== Issues hit ==== igt@kms_chamelium@hdmi-hpd-fast: fi-kbl-7500u: SKIP -> FAIL (fdo#102672, fdo#103841) fdo#102672 https://bugs.freedesktop.org/show_bug.cgi?id=102672 fdo#103841 https://bugs.freedesktop.org/show_bug.cgi?id=103841 == Participating hosts (43 -> 39) == Missing (4): fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-hsw-4200u == Build changes == * IGT: IGT_4530 -> IGTPW_1511 CI_DRM_4401: 4fe59a304a9a855a1c0e9a576c94d4cca239b427 @ git://anongit.freedesktop.org/gfx-ci/linux IGTPW_1511: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1511/ IGT_4530: 0e98bf69f146eb72fe3a7c3b19a049b5786f0ca3 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1511/issues.html _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 18+ messages in thread
* [igt-dev] ✓ Fi.CI.IGT: success for igt/gem_exec_gttfill: Avoid pwrite into busy handle 2018-06-28 21:35 ` Chris Wilson (?) (?) @ 2018-06-29 7:17 ` Patchwork -1 siblings, 0 replies; 18+ messages in thread From: Patchwork @ 2018-06-29 7:17 UTC (permalink / raw) To: Chris Wilson; +Cc: igt-dev == Series Details == Series: igt/gem_exec_gttfill: Avoid pwrite into busy handle URL : https://patchwork.freedesktop.org/series/45620/ State : success == Summary == = CI Bug Log - changes from IGT_4530_full -> IGTPW_1511_full = == Summary - WARNING == Minor unknown changes coming with IGTPW_1511_full need to be verified manually. If you think the reported changes have nothing to do with the changes introduced in IGTPW_1511_full, please notify your bug team to allow them to document this new failure mode, which will reduce false positives in CI. External URL: https://patchwork.freedesktop.org/api/1.0/series/45620/revisions/1/mbox/ == Possible new issues == Here are the unknown changes that may have been introduced in IGTPW_1511_full: === IGT changes === ==== Warnings ==== igt@gem_linear_blits@interruptible: shard-glk: SKIP -> PASS shard-apl: SKIP -> PASS igt@perf_pmu@rc6: shard-kbl: SKIP -> PASS +1 == Known issues == Here are the changes found in IGTPW_1511_full that come from known issues: === IGT changes === ==== Issues hit ==== igt@drv_suspend@shrink: shard-snb: PASS -> FAIL (fdo#106886) igt@kms_cursor_legacy@cursor-vs-flip-varying-size: shard-hsw: PASS -> FAIL (fdo#103355) igt@kms_flip@plain-flip-ts-check: shard-glk: PASS -> FAIL (fdo#100368) igt@kms_flip_tiling@flip-to-y-tiled: shard-glk: PASS -> FAIL (fdo#104724, fdo#103822) igt@kms_rotation_crc@sprite-rotation-180: shard-snb: PASS -> FAIL (fdo#104724, fdo#103925) igt@perf_pmu@rc6-runtime-pm: shard-snb: SKIP -> INCOMPLETE (fdo#105411) ==== Possible fixes ==== igt@drv_selftest@live_gtt: shard-glk: FAIL (fdo#105347) -> PASS igt@gem_ctx_isolation@rcs0-s3: shard-kbl: INCOMPLETE (fdo#103665) -> PASS igt@kms_cursor_legacy@cursora-vs-flipa-toggle: shard-glk: DMESG-WARN (fdo#105763) -> PASS igt@kms_flip@2x-plain-flip-fb-recreate: shard-glk: FAIL (fdo#100368) -> PASS igt@kms_flip@flip-vs-expired-vblank: shard-hsw: FAIL (fdo#105363, fdo#102887) -> PASS igt@kms_flip_tiling@flip-to-x-tiled: shard-glk: FAIL (fdo#104724, fdo#103822) -> PASS igt@kms_flip_tiling@flip-x-tiled: shard-glk: FAIL (fdo#104724) -> PASS igt@perf_pmu@busy-accuracy-98-vcs1: shard-snb: INCOMPLETE (fdo#105411) -> SKIP igt@testdisplay: shard-glk: INCOMPLETE (k.org#198133, fdo#103359) -> PASS fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368 fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887 fdo#103355 https://bugs.freedesktop.org/show_bug.cgi?id=103355 fdo#103359 https://bugs.freedesktop.org/show_bug.cgi?id=103359 fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665 fdo#103822 https://bugs.freedesktop.org/show_bug.cgi?id=103822 fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925 fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724 fdo#105347 https://bugs.freedesktop.org/show_bug.cgi?id=105347 fdo#105363 https://bugs.freedesktop.org/show_bug.cgi?id=105363 fdo#105411 https://bugs.freedesktop.org/show_bug.cgi?id=105411 fdo#105763 https://bugs.freedesktop.org/show_bug.cgi?id=105763 fdo#106886 https://bugs.freedesktop.org/show_bug.cgi?id=106886 k.org#198133 https://bugzilla.kernel.org/show_bug.cgi?id=198133 == Participating hosts (5 -> 5) == No changes in participating hosts == Build changes == * IGT: IGT_4530 -> IGTPW_1511 * Linux: CI_DRM_4373 -> CI_DRM_4401 CI_DRM_4373: be7193758db79443ad5dc45072a166746819ba7e @ git://anongit.freedesktop.org/gfx-ci/linux CI_DRM_4401: 4fe59a304a9a855a1c0e9a576c94d4cca239b427 @ git://anongit.freedesktop.org/gfx-ci/linux IGTPW_1511: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1511/ IGT_4530: 0e98bf69f146eb72fe3a7c3b19a049b5786f0ca3 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1511/shards.html _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 18+ messages in thread
* [igt-dev] [PATCH i-g-t v2] igt/gem_userptr: Check read-only mappings 2018-06-28 21:35 ` Chris Wilson @ 2018-06-29 7:44 ` Chris Wilson -1 siblings, 0 replies; 18+ messages in thread From: Chris Wilson @ 2018-06-29 7:44 UTC (permalink / raw) To: intel-gfx; +Cc: igt-dev, Tvrtko Ursulin Setup a userptr object that only has a read-only mapping back to a file store (memfd). Then attempt to write into that mapping using the GPU and assert that those writes do not land (while also writing via a writable userptr mapping into the same memfd to verify that the GPU is working!) Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- Minor commentary additions --- configure.ac | 1 + lib/ioctl_wrappers.c | 4 +- lib/ioctl_wrappers.h | 4 +- lib/meson.build | 1 + meson.build | 1 + tests/Makefile.am | 4 +- tests/gem_userptr_blits.c | 372 +++++++++++++++++++++++++++++++++++++- 7 files changed, 377 insertions(+), 10 deletions(-) diff --git a/configure.ac b/configure.ac index 1ee4e90e9..195963d4f 100644 --- a/configure.ac +++ b/configure.ac @@ -125,6 +125,7 @@ PKG_CHECK_MODULES(PCIACCESS, [pciaccess >= 0.10]) PKG_CHECK_MODULES(KMOD, [libkmod]) PKG_CHECK_MODULES(PROCPS, [libprocps]) PKG_CHECK_MODULES(LIBUNWIND, [libunwind]) +PKG_CHECK_MODULES(SSL, [openssl]) PKG_CHECK_MODULES(VALGRIND, [valgrind], [have_valgrind=yes], [have_valgrind=no]) if test x$have_valgrind = xyes; then diff --git a/lib/ioctl_wrappers.c b/lib/ioctl_wrappers.c index 79db44a8c..d5d2a4e4c 100644 --- a/lib/ioctl_wrappers.c +++ b/lib/ioctl_wrappers.c @@ -869,7 +869,7 @@ int gem_madvise(int fd, uint32_t handle, int state) return madv.retained; } -int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle) +int __gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle) { struct drm_i915_gem_userptr userptr; @@ -898,7 +898,7 @@ int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, ui * * Returns userptr handle for the GEM object. */ -void gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle) +void gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle) { igt_assert_eq(__gem_userptr(fd, ptr, size, read_only, flags, handle), 0); } diff --git a/lib/ioctl_wrappers.h b/lib/ioctl_wrappers.h index b966f72c9..8e2cd380b 100644 --- a/lib/ioctl_wrappers.h +++ b/lib/ioctl_wrappers.h @@ -133,8 +133,8 @@ struct local_i915_gem_userptr { #define LOCAL_I915_USERPTR_UNSYNCHRONIZED (1<<31) uint32_t handle; }; -void gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle); -int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle); +void gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle); +int __gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle); void gem_sw_finish(int fd, uint32_t handle); diff --git a/lib/meson.build b/lib/meson.build index 1a355414e..939167f91 100644 --- a/lib/meson.build +++ b/lib/meson.build @@ -62,6 +62,7 @@ lib_deps = [ pthreads, math, realtime, + ssl, ] if libdrm_intel.found() diff --git a/meson.build b/meson.build index 4d15d6238..638c01066 100644 --- a/meson.build +++ b/meson.build @@ -98,6 +98,7 @@ pciaccess = dependency('pciaccess', version : '>=0.10') libkmod = dependency('libkmod') libprocps = dependency('libprocps', required : true) libunwind = dependency('libunwind', required : true) +ssl = dependency('openssl', required : true) valgrind = null_dep valgrindinfo = 'No' diff --git a/tests/Makefile.am b/tests/Makefile.am index f41ad5096..ba307b220 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -126,8 +126,8 @@ gem_tiled_swapping_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) gem_tiled_swapping_LDADD = $(LDADD) -lpthread prime_self_import_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) prime_self_import_LDADD = $(LDADD) -lpthread -gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) -gem_userptr_blits_LDADD = $(LDADD) -lpthread +gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) $(SSL_CFLAGS) +gem_userptr_blits_LDADD = $(LDADD) $(SSL_LIBS) -lpthread perf_pmu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la gem_eio_LDADD = $(LDADD) -lrt diff --git a/tests/gem_userptr_blits.c b/tests/gem_userptr_blits.c index 7e3b6ef38..0fb7eba04 100644 --- a/tests/gem_userptr_blits.c +++ b/tests/gem_userptr_blits.c @@ -43,13 +43,17 @@ #include <fcntl.h> #include <inttypes.h> #include <errno.h> +#include <setjmp.h> #include <sys/stat.h> #include <sys/time.h> #include <sys/mman.h> +#include <openssl/sha.h> #include <signal.h> #include <pthread.h> #include <time.h> +#include <linux/memfd.h> + #include "drm.h" #include "i915_drm.h" @@ -238,6 +242,57 @@ blit(int fd, uint32_t dst, uint32_t src, uint32_t *all_bo, int n_bo) return ret; } +static void store_dword(int fd, uint32_t target, + uint32_t offset, uint32_t value) +{ + const int gen = intel_gen(intel_get_drm_devid(fd)); + struct drm_i915_gem_exec_object2 obj[2]; + struct drm_i915_gem_relocation_entry reloc; + struct drm_i915_gem_execbuffer2 execbuf; + uint32_t batch[16]; + int i; + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = to_user_pointer(obj); + execbuf.buffer_count = ARRAY_SIZE(obj); + execbuf.flags = 0; + if (gen < 6) + execbuf.flags |= I915_EXEC_SECURE; + + memset(obj, 0, sizeof(obj)); + obj[0].handle = target; + obj[1].handle = gem_create(fd, 4096); + + memset(&reloc, 0, sizeof(reloc)); + reloc.target_handle = obj[0].handle; + reloc.presumed_offset = 0; + reloc.offset = sizeof(uint32_t); + reloc.delta = offset; + reloc.read_domains = I915_GEM_DOMAIN_RENDER; + reloc.write_domain = I915_GEM_DOMAIN_RENDER; + obj[1].relocs_ptr = to_user_pointer(&reloc); + obj[1].relocation_count = 1; + + i = 0; + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + if (gen >= 8) { + batch[++i] = offset; + batch[++i] = 0; + } else if (gen >= 4) { + batch[++i] = 0; + batch[++i] = offset; + reloc.offset += sizeof(uint32_t); + } else { + batch[i]--; + batch[++i] = offset; + } + batch[++i] = value; + batch[++i] = MI_BATCH_BUFFER_END; + gem_write(fd, obj[1].handle, 0, batch, sizeof(batch)); + gem_execbuf(fd, &execbuf); + gem_close(fd, obj[1].handle); +} + static uint32_t create_userptr(int fd, uint32_t val, uint32_t *ptr) { @@ -941,6 +996,310 @@ static int test_dmabuf(void) return 0; } +static void test_readonly(int i915) +{ + unsigned char orig[SHA_DIGEST_LENGTH]; + uint64_t aperture_size; + uint32_t whandle, rhandle; + size_t sz, total; + void *pages, *space; + int memfd; + + /* + * A small batch of pages; small enough to cheaply check for stray + * writes but large enough that we don't create too many VMA pointing + * back to this set from the large arena. The limit on total number + * of VMA for a process is 65,536 (at least on this kernel). + * + * We then write from the GPU through the large arena into the smaller + * backing storage, which we can cheaply check to see if those writes + * have landed (using a SHA1sum). Repeating the same random GPU writes + * though a read-only handle to confirm that this time the writes are + * discarded and the backing store unchanged. + */ + sz = 16 << 12; + memfd = memfd_create("pages", 0); + igt_require(memfd != -1); + igt_require(ftruncate(memfd, sz) == 0); + + pages = mmap(NULL, sz, PROT_WRITE, MAP_SHARED, memfd, 0); + igt_assert(pages != MAP_FAILED); + + igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &rhandle) == 0); + gem_close(i915, rhandle); + + gem_userptr(i915, pages, sz, false, userptr_flags, &whandle); + + total = 2048ull << 20; + aperture_size = gem_aperture_size(i915) / 2; + if (aperture_size < total) + total = aperture_size; + total = total / sz * sz; + igt_info("Using a %'zuB (%'zu pages) arena onto %zu pages\n", + total, total >> 12, sz >> 12); + + /* Create an arena all pointing to the same set of pages */ + space = mmap(NULL, total, PROT_READ, MAP_ANON | MAP_SHARED, -1, 0); + igt_require(space != MAP_FAILED); + for (size_t offset = 0; offset < total; offset += sz) { + igt_assert(mmap(space + offset, sz, + PROT_WRITE, MAP_SHARED | MAP_FIXED, + memfd, 0) != MAP_FAILED); + *(uint32_t *)(space + offset) = offset; + } + igt_assert_eq_u32(*(uint32_t *)pages, (uint32_t)(total - sz)); + igt_assert(mlock(space, total) == 0); + close(memfd); + + /* Check we can create a normal userptr bo wrapping the wrapper */ + gem_userptr(i915, space, total, false, userptr_flags, &rhandle); + gem_set_domain(i915, rhandle, I915_GEM_DOMAIN_CPU, 0); + for (size_t offset = 0; offset < total; offset += sz) + store_dword(i915, rhandle, offset + 4, offset / sz); + gem_sync(i915, rhandle); + igt_assert_eq_u32(*(uint32_t *)(pages + 0), (uint32_t)(total - sz)); + igt_assert_eq_u32(*(uint32_t *)(pages + 4), (uint32_t)(total / sz - 1)); + gem_close(i915, rhandle); + + /* Now enforce read-only henceforth */ + igt_assert(mprotect(space, total, PROT_READ) == 0); + + SHA1(pages, sz, orig); + igt_fork(child, 1) { + const int gen = intel_gen(intel_get_drm_devid(i915)); + const int nreloc = 1024; + struct drm_i915_gem_relocation_entry *reloc; + struct drm_i915_gem_exec_object2 obj[2]; + struct drm_i915_gem_execbuffer2 exec; + unsigned char ref[SHA_DIGEST_LENGTH], result[SHA_DIGEST_LENGTH]; + uint32_t *batch; + int i; + + reloc = calloc(sizeof(*reloc), nreloc); + gem_userptr(i915, space, total, true, userptr_flags, &rhandle); + + memset(obj, 0, sizeof(obj)); + obj[0].flags = LOCAL_EXEC_OBJECT_SUPPORTS_48B; + obj[1].handle = gem_create(i915, sz); + obj[1].relocation_count = nreloc; + obj[1].relocs_ptr = to_user_pointer(reloc); + + batch = gem_mmap__wc(i915, obj[1].handle, 0, sz, PROT_WRITE); + + memset(&exec, 0, sizeof(exec)); + exec.buffer_count = 2; + exec.buffers_ptr = to_user_pointer(obj); + if (gen < 6) + exec.flags |= I915_EXEC_SECURE; + + for_each_engine(i915, exec.flags) { + /* First tweak the backing store through the write */ + i = 0; + obj[0].handle = whandle; + for (int n = 0; n < nreloc; n++) { + uint64_t offset; + + reloc[n].target_handle = obj[0].handle; + reloc[n].delta = rand() % (sz / 4) * 4; + reloc[n].offset = (i + 1) * sizeof(uint32_t); + reloc[n].presumed_offset = obj[0].offset; + reloc[n].read_domains = I915_GEM_DOMAIN_RENDER; + reloc[n].write_domain = I915_GEM_DOMAIN_RENDER; + + offset = reloc[n].presumed_offset + reloc[n].delta; + + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + if (gen >= 8) { + batch[++i] = offset; + batch[++i] = offset >> 32; + } else if (gen >= 4) { + batch[++i] = 0; + batch[++i] = offset; + reloc[n].offset += sizeof(uint32_t); + } else { + batch[i]--; + batch[++i] = offset; + } + batch[++i] = rand(); + i++; + } + batch[i] = MI_BATCH_BUFFER_END; + igt_assert(i * sizeof(uint32_t) < sz); + + gem_execbuf(i915, &exec); + gem_sync(i915, obj[0].handle); + SHA1(pages, sz, ref); + + igt_assert(memcmp(ref, orig, sizeof(ref))); + memcpy(orig, ref, sizeof(orig)); + + /* Now try the same through the read-only handle */ + i = 0; + obj[0].handle = rhandle; + for (int n = 0; n < nreloc; n++) { + uint64_t offset; + + reloc[n].target_handle = obj[0].handle; + reloc[n].delta = rand() % (total / 4) * 4; + reloc[n].offset = (i + 1) * sizeof(uint32_t); + reloc[n].presumed_offset = obj[0].offset; + reloc[n].read_domains = I915_GEM_DOMAIN_RENDER; + reloc[n].write_domain = I915_GEM_DOMAIN_RENDER; + + offset = reloc[n].presumed_offset + reloc[n].delta; + + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + if (gen >= 8) { + batch[++i] = offset; + batch[++i] = offset >> 32; + } else if (gen >= 4) { + batch[++i] = 0; + batch[++i] = offset; + reloc[n].offset += sizeof(uint32_t); + } else { + batch[i]--; + batch[++i] = offset; + } + batch[++i] = rand(); + i++; + } + batch[i] = MI_BATCH_BUFFER_END; + + gem_execbuf(i915, &exec); + gem_sync(i915, obj[0].handle); + SHA1(pages, sz, result); + + /* + * As the writes into the read-only GPU bo should fail, + * the SHA1 hash of the backing store should be + * unaffected. + */ + igt_assert(memcmp(ref, result, SHA_DIGEST_LENGTH) == 0); + } + + munmap(batch, sz); + gem_close(i915, obj[1].handle); + gem_close(i915, rhandle); + } + igt_waitchildren(); + + munmap(space, total); + munmap(pages, sz); +} + +static jmp_buf sigjmp; +static void sigjmp_handler(int sig) +{ + siglongjmp(sigjmp, sig); +} + +static void test_readonly_mmap(int i915) +{ + unsigned char original[SHA_DIGEST_LENGTH]; + unsigned char result[SHA_DIGEST_LENGTH]; + uint32_t handle; + uint32_t sz; + void *pages; + void *ptr; + int sig; + + /* + * A quick check to ensure that we cannot circumvent the + * read-only nature of our memory by creating a GTT mmap into + * the pages. Imagine receiving a readonly SHM segment from + * another process, or a readonly file mmap, it must remain readonly + * on the GPU as well. + */ + + igt_require(igt_setup_clflush()); + + sz = 16 << 12; + pages = mmap(NULL, sz, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); + igt_assert(pages != MAP_FAILED); + + igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &handle) == 0); + gem_set_caching(i915, handle, 0); + + memset(pages, 0xa5, sz); + igt_clflush_range(pages, sz); + SHA1(pages, sz, original); + + ptr = __gem_mmap__gtt(i915, handle, sz, PROT_WRITE); + igt_assert(ptr == NULL); + + ptr = gem_mmap__gtt(i915, handle, sz, PROT_READ); + gem_close(i915, handle); + + /* Check that a write into the GTT readonly map fails */ + if (!(sig = sigsetjmp(sigjmp, 1))) { + signal(SIGBUS, sigjmp_handler); + signal(SIGSEGV, sigjmp_handler); + memset(ptr, 0x5a, sz); + igt_assert(0); + } + igt_assert_eq(sig, SIGSEGV); + + /* Check that we disallow removing the readonly protection */ + igt_assert(mprotect(ptr, sz, PROT_WRITE)); + if (!(sig = sigsetjmp(sigjmp, 1))) { + signal(SIGBUS, sigjmp_handler); + signal(SIGSEGV, sigjmp_handler); + memset(ptr, 0x5a, sz); + igt_assert(0); + } + igt_assert_eq(sig, SIGSEGV); + + /* A single read from the GTT pointer to prove that works */ + igt_assert_eq_u32(*(uint8_t *)ptr, 0xa5); + munmap(ptr, sz); + + /* Double check that the kernel did indeed not let any writes through */ + igt_clflush_range(pages, sz); + SHA1(pages, sz, result); + igt_assert(!memcmp(original, result, sizeof(original))); + + munmap(pages, sz); +} + +static void test_readonly_pwrite(int i915) +{ + unsigned char original[SHA_DIGEST_LENGTH]; + unsigned char result[SHA_DIGEST_LENGTH]; + uint32_t handle; + uint32_t sz; + void *pages; + + /* + * Same as for GTT mmapings, we cannot alone ourselves to + * circumvent readonly protection on a piece of memory via the + * pwrite ioctl. + */ + + igt_require(igt_setup_clflush()); + + sz = 16 << 12; + pages = mmap(NULL, sz, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); + igt_assert(pages != MAP_FAILED); + + igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &handle) == 0); + memset(pages, 0xa5, sz); + SHA1(pages, sz, original); + + for (int page = 0; page < 16; page++) { + char data[4096]; + + memset(data, page, sizeof(data)); + igt_assert_eq(__gem_write(i915, handle, page << 12, data, sizeof(data)), -EINVAL); + } + + gem_close(i915, handle); + + SHA1(pages, sz, result); + igt_assert(!memcmp(original, result, sizeof(original))); + + munmap(pages, sz); +} + static int test_usage_restrictions(int fd) { void *ptr; @@ -961,10 +1320,6 @@ static int test_usage_restrictions(int fd) ret = __gem_userptr(fd, (char *)ptr + 1, PAGE_SIZE - 1, 0, userptr_flags, &handle); igt_assert_neq(ret, 0); - /* Read-only not supported. */ - ret = __gem_userptr(fd, (char *)ptr, PAGE_SIZE, 1, userptr_flags, &handle); - igt_assert_neq(ret, 0); - free(ptr); return 0; @@ -1502,6 +1857,15 @@ int main(int argc, char **argv) igt_subtest("dmabuf-unsync") test_dmabuf(); + igt_subtest("readonly-unsync") + test_readonly(fd); + + igt_subtest("readonly-mmap-unsync") + test_readonly_mmap(fd); + + igt_subtest("readonly-pwrite-unsync") + test_readonly_pwrite(fd); + for (unsigned flags = 0; flags < ALL_FORKING_EVICTIONS + 1; flags++) { igt_subtest_f("forked-unsync%s%s%s-%s", flags & FORKING_EVICTIONS_SWAPPING ? "-swapping" : "", -- 2.18.0 _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH i-g-t v2] igt/gem_userptr: Check read-only mappings @ 2018-06-29 7:44 ` Chris Wilson 0 siblings, 0 replies; 18+ messages in thread From: Chris Wilson @ 2018-06-29 7:44 UTC (permalink / raw) To: intel-gfx; +Cc: igt-dev Setup a userptr object that only has a read-only mapping back to a file store (memfd). Then attempt to write into that mapping using the GPU and assert that those writes do not land (while also writing via a writable userptr mapping into the same memfd to verify that the GPU is working!) Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- Minor commentary additions --- configure.ac | 1 + lib/ioctl_wrappers.c | 4 +- lib/ioctl_wrappers.h | 4 +- lib/meson.build | 1 + meson.build | 1 + tests/Makefile.am | 4 +- tests/gem_userptr_blits.c | 372 +++++++++++++++++++++++++++++++++++++- 7 files changed, 377 insertions(+), 10 deletions(-) diff --git a/configure.ac b/configure.ac index 1ee4e90e9..195963d4f 100644 --- a/configure.ac +++ b/configure.ac @@ -125,6 +125,7 @@ PKG_CHECK_MODULES(PCIACCESS, [pciaccess >= 0.10]) PKG_CHECK_MODULES(KMOD, [libkmod]) PKG_CHECK_MODULES(PROCPS, [libprocps]) PKG_CHECK_MODULES(LIBUNWIND, [libunwind]) +PKG_CHECK_MODULES(SSL, [openssl]) PKG_CHECK_MODULES(VALGRIND, [valgrind], [have_valgrind=yes], [have_valgrind=no]) if test x$have_valgrind = xyes; then diff --git a/lib/ioctl_wrappers.c b/lib/ioctl_wrappers.c index 79db44a8c..d5d2a4e4c 100644 --- a/lib/ioctl_wrappers.c +++ b/lib/ioctl_wrappers.c @@ -869,7 +869,7 @@ int gem_madvise(int fd, uint32_t handle, int state) return madv.retained; } -int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle) +int __gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle) { struct drm_i915_gem_userptr userptr; @@ -898,7 +898,7 @@ int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, ui * * Returns userptr handle for the GEM object. */ -void gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle) +void gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle) { igt_assert_eq(__gem_userptr(fd, ptr, size, read_only, flags, handle), 0); } diff --git a/lib/ioctl_wrappers.h b/lib/ioctl_wrappers.h index b966f72c9..8e2cd380b 100644 --- a/lib/ioctl_wrappers.h +++ b/lib/ioctl_wrappers.h @@ -133,8 +133,8 @@ struct local_i915_gem_userptr { #define LOCAL_I915_USERPTR_UNSYNCHRONIZED (1<<31) uint32_t handle; }; -void gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle); -int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle); +void gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle); +int __gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle); void gem_sw_finish(int fd, uint32_t handle); diff --git a/lib/meson.build b/lib/meson.build index 1a355414e..939167f91 100644 --- a/lib/meson.build +++ b/lib/meson.build @@ -62,6 +62,7 @@ lib_deps = [ pthreads, math, realtime, + ssl, ] if libdrm_intel.found() diff --git a/meson.build b/meson.build index 4d15d6238..638c01066 100644 --- a/meson.build +++ b/meson.build @@ -98,6 +98,7 @@ pciaccess = dependency('pciaccess', version : '>=0.10') libkmod = dependency('libkmod') libprocps = dependency('libprocps', required : true) libunwind = dependency('libunwind', required : true) +ssl = dependency('openssl', required : true) valgrind = null_dep valgrindinfo = 'No' diff --git a/tests/Makefile.am b/tests/Makefile.am index f41ad5096..ba307b220 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -126,8 +126,8 @@ gem_tiled_swapping_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) gem_tiled_swapping_LDADD = $(LDADD) -lpthread prime_self_import_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) prime_self_import_LDADD = $(LDADD) -lpthread -gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) -gem_userptr_blits_LDADD = $(LDADD) -lpthread +gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) $(SSL_CFLAGS) +gem_userptr_blits_LDADD = $(LDADD) $(SSL_LIBS) -lpthread perf_pmu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la gem_eio_LDADD = $(LDADD) -lrt diff --git a/tests/gem_userptr_blits.c b/tests/gem_userptr_blits.c index 7e3b6ef38..0fb7eba04 100644 --- a/tests/gem_userptr_blits.c +++ b/tests/gem_userptr_blits.c @@ -43,13 +43,17 @@ #include <fcntl.h> #include <inttypes.h> #include <errno.h> +#include <setjmp.h> #include <sys/stat.h> #include <sys/time.h> #include <sys/mman.h> +#include <openssl/sha.h> #include <signal.h> #include <pthread.h> #include <time.h> +#include <linux/memfd.h> + #include "drm.h" #include "i915_drm.h" @@ -238,6 +242,57 @@ blit(int fd, uint32_t dst, uint32_t src, uint32_t *all_bo, int n_bo) return ret; } +static void store_dword(int fd, uint32_t target, + uint32_t offset, uint32_t value) +{ + const int gen = intel_gen(intel_get_drm_devid(fd)); + struct drm_i915_gem_exec_object2 obj[2]; + struct drm_i915_gem_relocation_entry reloc; + struct drm_i915_gem_execbuffer2 execbuf; + uint32_t batch[16]; + int i; + + memset(&execbuf, 0, sizeof(execbuf)); + execbuf.buffers_ptr = to_user_pointer(obj); + execbuf.buffer_count = ARRAY_SIZE(obj); + execbuf.flags = 0; + if (gen < 6) + execbuf.flags |= I915_EXEC_SECURE; + + memset(obj, 0, sizeof(obj)); + obj[0].handle = target; + obj[1].handle = gem_create(fd, 4096); + + memset(&reloc, 0, sizeof(reloc)); + reloc.target_handle = obj[0].handle; + reloc.presumed_offset = 0; + reloc.offset = sizeof(uint32_t); + reloc.delta = offset; + reloc.read_domains = I915_GEM_DOMAIN_RENDER; + reloc.write_domain = I915_GEM_DOMAIN_RENDER; + obj[1].relocs_ptr = to_user_pointer(&reloc); + obj[1].relocation_count = 1; + + i = 0; + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + if (gen >= 8) { + batch[++i] = offset; + batch[++i] = 0; + } else if (gen >= 4) { + batch[++i] = 0; + batch[++i] = offset; + reloc.offset += sizeof(uint32_t); + } else { + batch[i]--; + batch[++i] = offset; + } + batch[++i] = value; + batch[++i] = MI_BATCH_BUFFER_END; + gem_write(fd, obj[1].handle, 0, batch, sizeof(batch)); + gem_execbuf(fd, &execbuf); + gem_close(fd, obj[1].handle); +} + static uint32_t create_userptr(int fd, uint32_t val, uint32_t *ptr) { @@ -941,6 +996,310 @@ static int test_dmabuf(void) return 0; } +static void test_readonly(int i915) +{ + unsigned char orig[SHA_DIGEST_LENGTH]; + uint64_t aperture_size; + uint32_t whandle, rhandle; + size_t sz, total; + void *pages, *space; + int memfd; + + /* + * A small batch of pages; small enough to cheaply check for stray + * writes but large enough that we don't create too many VMA pointing + * back to this set from the large arena. The limit on total number + * of VMA for a process is 65,536 (at least on this kernel). + * + * We then write from the GPU through the large arena into the smaller + * backing storage, which we can cheaply check to see if those writes + * have landed (using a SHA1sum). Repeating the same random GPU writes + * though a read-only handle to confirm that this time the writes are + * discarded and the backing store unchanged. + */ + sz = 16 << 12; + memfd = memfd_create("pages", 0); + igt_require(memfd != -1); + igt_require(ftruncate(memfd, sz) == 0); + + pages = mmap(NULL, sz, PROT_WRITE, MAP_SHARED, memfd, 0); + igt_assert(pages != MAP_FAILED); + + igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &rhandle) == 0); + gem_close(i915, rhandle); + + gem_userptr(i915, pages, sz, false, userptr_flags, &whandle); + + total = 2048ull << 20; + aperture_size = gem_aperture_size(i915) / 2; + if (aperture_size < total) + total = aperture_size; + total = total / sz * sz; + igt_info("Using a %'zuB (%'zu pages) arena onto %zu pages\n", + total, total >> 12, sz >> 12); + + /* Create an arena all pointing to the same set of pages */ + space = mmap(NULL, total, PROT_READ, MAP_ANON | MAP_SHARED, -1, 0); + igt_require(space != MAP_FAILED); + for (size_t offset = 0; offset < total; offset += sz) { + igt_assert(mmap(space + offset, sz, + PROT_WRITE, MAP_SHARED | MAP_FIXED, + memfd, 0) != MAP_FAILED); + *(uint32_t *)(space + offset) = offset; + } + igt_assert_eq_u32(*(uint32_t *)pages, (uint32_t)(total - sz)); + igt_assert(mlock(space, total) == 0); + close(memfd); + + /* Check we can create a normal userptr bo wrapping the wrapper */ + gem_userptr(i915, space, total, false, userptr_flags, &rhandle); + gem_set_domain(i915, rhandle, I915_GEM_DOMAIN_CPU, 0); + for (size_t offset = 0; offset < total; offset += sz) + store_dword(i915, rhandle, offset + 4, offset / sz); + gem_sync(i915, rhandle); + igt_assert_eq_u32(*(uint32_t *)(pages + 0), (uint32_t)(total - sz)); + igt_assert_eq_u32(*(uint32_t *)(pages + 4), (uint32_t)(total / sz - 1)); + gem_close(i915, rhandle); + + /* Now enforce read-only henceforth */ + igt_assert(mprotect(space, total, PROT_READ) == 0); + + SHA1(pages, sz, orig); + igt_fork(child, 1) { + const int gen = intel_gen(intel_get_drm_devid(i915)); + const int nreloc = 1024; + struct drm_i915_gem_relocation_entry *reloc; + struct drm_i915_gem_exec_object2 obj[2]; + struct drm_i915_gem_execbuffer2 exec; + unsigned char ref[SHA_DIGEST_LENGTH], result[SHA_DIGEST_LENGTH]; + uint32_t *batch; + int i; + + reloc = calloc(sizeof(*reloc), nreloc); + gem_userptr(i915, space, total, true, userptr_flags, &rhandle); + + memset(obj, 0, sizeof(obj)); + obj[0].flags = LOCAL_EXEC_OBJECT_SUPPORTS_48B; + obj[1].handle = gem_create(i915, sz); + obj[1].relocation_count = nreloc; + obj[1].relocs_ptr = to_user_pointer(reloc); + + batch = gem_mmap__wc(i915, obj[1].handle, 0, sz, PROT_WRITE); + + memset(&exec, 0, sizeof(exec)); + exec.buffer_count = 2; + exec.buffers_ptr = to_user_pointer(obj); + if (gen < 6) + exec.flags |= I915_EXEC_SECURE; + + for_each_engine(i915, exec.flags) { + /* First tweak the backing store through the write */ + i = 0; + obj[0].handle = whandle; + for (int n = 0; n < nreloc; n++) { + uint64_t offset; + + reloc[n].target_handle = obj[0].handle; + reloc[n].delta = rand() % (sz / 4) * 4; + reloc[n].offset = (i + 1) * sizeof(uint32_t); + reloc[n].presumed_offset = obj[0].offset; + reloc[n].read_domains = I915_GEM_DOMAIN_RENDER; + reloc[n].write_domain = I915_GEM_DOMAIN_RENDER; + + offset = reloc[n].presumed_offset + reloc[n].delta; + + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + if (gen >= 8) { + batch[++i] = offset; + batch[++i] = offset >> 32; + } else if (gen >= 4) { + batch[++i] = 0; + batch[++i] = offset; + reloc[n].offset += sizeof(uint32_t); + } else { + batch[i]--; + batch[++i] = offset; + } + batch[++i] = rand(); + i++; + } + batch[i] = MI_BATCH_BUFFER_END; + igt_assert(i * sizeof(uint32_t) < sz); + + gem_execbuf(i915, &exec); + gem_sync(i915, obj[0].handle); + SHA1(pages, sz, ref); + + igt_assert(memcmp(ref, orig, sizeof(ref))); + memcpy(orig, ref, sizeof(orig)); + + /* Now try the same through the read-only handle */ + i = 0; + obj[0].handle = rhandle; + for (int n = 0; n < nreloc; n++) { + uint64_t offset; + + reloc[n].target_handle = obj[0].handle; + reloc[n].delta = rand() % (total / 4) * 4; + reloc[n].offset = (i + 1) * sizeof(uint32_t); + reloc[n].presumed_offset = obj[0].offset; + reloc[n].read_domains = I915_GEM_DOMAIN_RENDER; + reloc[n].write_domain = I915_GEM_DOMAIN_RENDER; + + offset = reloc[n].presumed_offset + reloc[n].delta; + + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); + if (gen >= 8) { + batch[++i] = offset; + batch[++i] = offset >> 32; + } else if (gen >= 4) { + batch[++i] = 0; + batch[++i] = offset; + reloc[n].offset += sizeof(uint32_t); + } else { + batch[i]--; + batch[++i] = offset; + } + batch[++i] = rand(); + i++; + } + batch[i] = MI_BATCH_BUFFER_END; + + gem_execbuf(i915, &exec); + gem_sync(i915, obj[0].handle); + SHA1(pages, sz, result); + + /* + * As the writes into the read-only GPU bo should fail, + * the SHA1 hash of the backing store should be + * unaffected. + */ + igt_assert(memcmp(ref, result, SHA_DIGEST_LENGTH) == 0); + } + + munmap(batch, sz); + gem_close(i915, obj[1].handle); + gem_close(i915, rhandle); + } + igt_waitchildren(); + + munmap(space, total); + munmap(pages, sz); +} + +static jmp_buf sigjmp; +static void sigjmp_handler(int sig) +{ + siglongjmp(sigjmp, sig); +} + +static void test_readonly_mmap(int i915) +{ + unsigned char original[SHA_DIGEST_LENGTH]; + unsigned char result[SHA_DIGEST_LENGTH]; + uint32_t handle; + uint32_t sz; + void *pages; + void *ptr; + int sig; + + /* + * A quick check to ensure that we cannot circumvent the + * read-only nature of our memory by creating a GTT mmap into + * the pages. Imagine receiving a readonly SHM segment from + * another process, or a readonly file mmap, it must remain readonly + * on the GPU as well. + */ + + igt_require(igt_setup_clflush()); + + sz = 16 << 12; + pages = mmap(NULL, sz, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); + igt_assert(pages != MAP_FAILED); + + igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &handle) == 0); + gem_set_caching(i915, handle, 0); + + memset(pages, 0xa5, sz); + igt_clflush_range(pages, sz); + SHA1(pages, sz, original); + + ptr = __gem_mmap__gtt(i915, handle, sz, PROT_WRITE); + igt_assert(ptr == NULL); + + ptr = gem_mmap__gtt(i915, handle, sz, PROT_READ); + gem_close(i915, handle); + + /* Check that a write into the GTT readonly map fails */ + if (!(sig = sigsetjmp(sigjmp, 1))) { + signal(SIGBUS, sigjmp_handler); + signal(SIGSEGV, sigjmp_handler); + memset(ptr, 0x5a, sz); + igt_assert(0); + } + igt_assert_eq(sig, SIGSEGV); + + /* Check that we disallow removing the readonly protection */ + igt_assert(mprotect(ptr, sz, PROT_WRITE)); + if (!(sig = sigsetjmp(sigjmp, 1))) { + signal(SIGBUS, sigjmp_handler); + signal(SIGSEGV, sigjmp_handler); + memset(ptr, 0x5a, sz); + igt_assert(0); + } + igt_assert_eq(sig, SIGSEGV); + + /* A single read from the GTT pointer to prove that works */ + igt_assert_eq_u32(*(uint8_t *)ptr, 0xa5); + munmap(ptr, sz); + + /* Double check that the kernel did indeed not let any writes through */ + igt_clflush_range(pages, sz); + SHA1(pages, sz, result); + igt_assert(!memcmp(original, result, sizeof(original))); + + munmap(pages, sz); +} + +static void test_readonly_pwrite(int i915) +{ + unsigned char original[SHA_DIGEST_LENGTH]; + unsigned char result[SHA_DIGEST_LENGTH]; + uint32_t handle; + uint32_t sz; + void *pages; + + /* + * Same as for GTT mmapings, we cannot alone ourselves to + * circumvent readonly protection on a piece of memory via the + * pwrite ioctl. + */ + + igt_require(igt_setup_clflush()); + + sz = 16 << 12; + pages = mmap(NULL, sz, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); + igt_assert(pages != MAP_FAILED); + + igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &handle) == 0); + memset(pages, 0xa5, sz); + SHA1(pages, sz, original); + + for (int page = 0; page < 16; page++) { + char data[4096]; + + memset(data, page, sizeof(data)); + igt_assert_eq(__gem_write(i915, handle, page << 12, data, sizeof(data)), -EINVAL); + } + + gem_close(i915, handle); + + SHA1(pages, sz, result); + igt_assert(!memcmp(original, result, sizeof(original))); + + munmap(pages, sz); +} + static int test_usage_restrictions(int fd) { void *ptr; @@ -961,10 +1320,6 @@ static int test_usage_restrictions(int fd) ret = __gem_userptr(fd, (char *)ptr + 1, PAGE_SIZE - 1, 0, userptr_flags, &handle); igt_assert_neq(ret, 0); - /* Read-only not supported. */ - ret = __gem_userptr(fd, (char *)ptr, PAGE_SIZE, 1, userptr_flags, &handle); - igt_assert_neq(ret, 0); - free(ptr); return 0; @@ -1502,6 +1857,15 @@ int main(int argc, char **argv) igt_subtest("dmabuf-unsync") test_dmabuf(); + igt_subtest("readonly-unsync") + test_readonly(fd); + + igt_subtest("readonly-mmap-unsync") + test_readonly_mmap(fd); + + igt_subtest("readonly-pwrite-unsync") + test_readonly_pwrite(fd); + for (unsigned flags = 0; flags < ALL_FORKING_EVICTIONS + 1; flags++) { igt_subtest_f("forked-unsync%s%s%s-%s", flags & FORKING_EVICTIONS_SWAPPING ? "-swapping" : "", -- 2.18.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [igt-dev] [Intel-gfx] [PATCH i-g-t v2] igt/gem_userptr: Check read-only mappings 2018-06-29 7:44 ` Chris Wilson @ 2018-06-29 9:31 ` Tvrtko Ursulin -1 siblings, 0 replies; 18+ messages in thread From: Tvrtko Ursulin @ 2018-06-29 9:31 UTC (permalink / raw) To: Chris Wilson, intel-gfx; +Cc: igt-dev On 29/06/2018 08:44, Chris Wilson wrote: > Setup a userptr object that only has a read-only mapping back to a file > store (memfd). Then attempt to write into that mapping using the GPU and > assert that those writes do not land (while also writing via a writable > userptr mapping into the same memfd to verify that the GPU is working!) > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > --- > Minor commentary additions > --- > configure.ac | 1 + > lib/ioctl_wrappers.c | 4 +- > lib/ioctl_wrappers.h | 4 +- > lib/meson.build | 1 + > meson.build | 1 + > tests/Makefile.am | 4 +- > tests/gem_userptr_blits.c | 372 +++++++++++++++++++++++++++++++++++++- > 7 files changed, 377 insertions(+), 10 deletions(-) > > diff --git a/configure.ac b/configure.ac > index 1ee4e90e9..195963d4f 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -125,6 +125,7 @@ PKG_CHECK_MODULES(PCIACCESS, [pciaccess >= 0.10]) > PKG_CHECK_MODULES(KMOD, [libkmod]) > PKG_CHECK_MODULES(PROCPS, [libprocps]) > PKG_CHECK_MODULES(LIBUNWIND, [libunwind]) > +PKG_CHECK_MODULES(SSL, [openssl]) > PKG_CHECK_MODULES(VALGRIND, [valgrind], [have_valgrind=yes], [have_valgrind=no]) > > if test x$have_valgrind = xyes; then > diff --git a/lib/ioctl_wrappers.c b/lib/ioctl_wrappers.c > index 79db44a8c..d5d2a4e4c 100644 > --- a/lib/ioctl_wrappers.c > +++ b/lib/ioctl_wrappers.c > @@ -869,7 +869,7 @@ int gem_madvise(int fd, uint32_t handle, int state) > return madv.retained; > } > > -int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle) > +int __gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle) > { > struct drm_i915_gem_userptr userptr; > > @@ -898,7 +898,7 @@ int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, ui > * > * Returns userptr handle for the GEM object. > */ > -void gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle) > +void gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle) > { > igt_assert_eq(__gem_userptr(fd, ptr, size, read_only, flags, handle), 0); > } > diff --git a/lib/ioctl_wrappers.h b/lib/ioctl_wrappers.h > index b966f72c9..8e2cd380b 100644 > --- a/lib/ioctl_wrappers.h > +++ b/lib/ioctl_wrappers.h > @@ -133,8 +133,8 @@ struct local_i915_gem_userptr { > #define LOCAL_I915_USERPTR_UNSYNCHRONIZED (1<<31) > uint32_t handle; > }; > -void gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle); > -int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle); > +void gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle); > +int __gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle); > > void gem_sw_finish(int fd, uint32_t handle); > > diff --git a/lib/meson.build b/lib/meson.build > index 1a355414e..939167f91 100644 > --- a/lib/meson.build > +++ b/lib/meson.build > @@ -62,6 +62,7 @@ lib_deps = [ > pthreads, > math, > realtime, > + ssl, > ] > > if libdrm_intel.found() > diff --git a/meson.build b/meson.build > index 4d15d6238..638c01066 100644 > --- a/meson.build > +++ b/meson.build > @@ -98,6 +98,7 @@ pciaccess = dependency('pciaccess', version : '>=0.10') > libkmod = dependency('libkmod') > libprocps = dependency('libprocps', required : true) > libunwind = dependency('libunwind', required : true) > +ssl = dependency('openssl', required : true) > > valgrind = null_dep > valgrindinfo = 'No' > diff --git a/tests/Makefile.am b/tests/Makefile.am > index f41ad5096..ba307b220 100644 > --- a/tests/Makefile.am > +++ b/tests/Makefile.am > @@ -126,8 +126,8 @@ gem_tiled_swapping_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > gem_tiled_swapping_LDADD = $(LDADD) -lpthread > prime_self_import_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > prime_self_import_LDADD = $(LDADD) -lpthread > -gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > -gem_userptr_blits_LDADD = $(LDADD) -lpthread > +gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) $(SSL_CFLAGS) > +gem_userptr_blits_LDADD = $(LDADD) $(SSL_LIBS) -lpthread > perf_pmu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la > > gem_eio_LDADD = $(LDADD) -lrt > diff --git a/tests/gem_userptr_blits.c b/tests/gem_userptr_blits.c > index 7e3b6ef38..0fb7eba04 100644 > --- a/tests/gem_userptr_blits.c > +++ b/tests/gem_userptr_blits.c > @@ -43,13 +43,17 @@ > #include <fcntl.h> > #include <inttypes.h> > #include <errno.h> > +#include <setjmp.h> > #include <sys/stat.h> > #include <sys/time.h> > #include <sys/mman.h> > +#include <openssl/sha.h> > #include <signal.h> > #include <pthread.h> > #include <time.h> > > +#include <linux/memfd.h> > + > #include "drm.h" > #include "i915_drm.h" > > @@ -238,6 +242,57 @@ blit(int fd, uint32_t dst, uint32_t src, uint32_t *all_bo, int n_bo) > return ret; > } > > +static void store_dword(int fd, uint32_t target, > + uint32_t offset, uint32_t value) > +{ > + const int gen = intel_gen(intel_get_drm_devid(fd)); > + struct drm_i915_gem_exec_object2 obj[2]; > + struct drm_i915_gem_relocation_entry reloc; > + struct drm_i915_gem_execbuffer2 execbuf; > + uint32_t batch[16]; > + int i; > + > + memset(&execbuf, 0, sizeof(execbuf)); > + execbuf.buffers_ptr = to_user_pointer(obj); > + execbuf.buffer_count = ARRAY_SIZE(obj); > + execbuf.flags = 0; > + if (gen < 6) > + execbuf.flags |= I915_EXEC_SECURE; > + > + memset(obj, 0, sizeof(obj)); > + obj[0].handle = target; > + obj[1].handle = gem_create(fd, 4096); > + > + memset(&reloc, 0, sizeof(reloc)); > + reloc.target_handle = obj[0].handle; > + reloc.presumed_offset = 0; > + reloc.offset = sizeof(uint32_t); > + reloc.delta = offset; > + reloc.read_domains = I915_GEM_DOMAIN_RENDER; > + reloc.write_domain = I915_GEM_DOMAIN_RENDER; > + obj[1].relocs_ptr = to_user_pointer(&reloc); > + obj[1].relocation_count = 1; > + > + i = 0; > + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); > + if (gen >= 8) { > + batch[++i] = offset; > + batch[++i] = 0; > + } else if (gen >= 4) { > + batch[++i] = 0; > + batch[++i] = offset; > + reloc.offset += sizeof(uint32_t); > + } else { > + batch[i]--; > + batch[++i] = offset; > + } > + batch[++i] = value; I still think to avoid too much code duplication: batch = __emit_store_dword(batch, gen, offset, value, &reloc.offset) Then from there see if something more could be extracted form the three call sites. > + batch[++i] = MI_BATCH_BUFFER_END; > + gem_write(fd, obj[1].handle, 0, batch, sizeof(batch)); > + gem_execbuf(fd, &execbuf); > + gem_close(fd, obj[1].handle); > +} > + > static uint32_t > create_userptr(int fd, uint32_t val, uint32_t *ptr) > { > @@ -941,6 +996,310 @@ static int test_dmabuf(void) > return 0; > } > > +static void test_readonly(int i915) > +{ > + unsigned char orig[SHA_DIGEST_LENGTH]; > + uint64_t aperture_size; > + uint32_t whandle, rhandle; > + size_t sz, total; > + void *pages, *space; > + int memfd; > + > + /* > + * A small batch of pages; small enough to cheaply check for stray > + * writes but large enough that we don't create too many VMA pointing > + * back to this set from the large arena. The limit on total number > + * of VMA for a process is 65,536 (at least on this kernel). > + * > + * We then write from the GPU through the large arena into the smaller > + * backing storage, which we can cheaply check to see if those writes > + * have landed (using a SHA1sum). Repeating the same random GPU writes > + * though a read-only handle to confirm that this time the writes are through > + * discarded and the backing store unchanged. > + */ > + sz = 16 << 12; Here you aim not to exceed the above mentioned per-process VMA limit but please just express that in the code. Maybe re-order the code a bit: total = 2Gib sz = 2Gib / max_vmas_per_process * 2 aperture_size = ... total = round_down Then proceed with allocation etc. > + memfd = memfd_create("pages", 0); > + igt_require(memfd != -1); > + igt_require(ftruncate(memfd, sz) == 0); > + > + pages = mmap(NULL, sz, PROT_WRITE, MAP_SHARED, memfd, 0); > + igt_assert(pages != MAP_FAILED); > + > + igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &rhandle) == 0); > + gem_close(i915, rhandle); > + > + gem_userptr(i915, pages, sz, false, userptr_flags, &whandle); > + /* From your reply: """ the largest offset we can use is 4G, and we can't use the full range as we need some extra room for batches, and we can't use the full VMA limit without serious slow down and risk of exhaustion. Then sticking to a pot. """ */ > + total = 2048ull << 20; > + aperture_size = gem_aperture_size(i915) / 2; > + if (aperture_size < total) > + total = aperture_size; > + total = total / sz * sz; > + igt_info("Using a %'zuB (%'zu pages) arena onto %zu pages\n", > + total, total >> 12, sz >> 12); > + > + /* Create an arena all pointing to the same set of pages */ > + space = mmap(NULL, total, PROT_READ, MAP_ANON | MAP_SHARED, -1, 0); Why MAP_SHARED? > + igt_require(space != MAP_FAILED); > + for (size_t offset = 0; offset < total; offset += sz) { > + igt_assert(mmap(space + offset, sz, > + PROT_WRITE, MAP_SHARED | MAP_FIXED, > + memfd, 0) != MAP_FAILED); > + *(uint32_t *)(space + offset) = offset; AFAIU: First write instantiates the backing store, well, one page of it I guess. Depending how memfd works I guess. But mlock later will do all of it. > + } > + igt_assert_eq_u32(*(uint32_t *)pages, (uint32_t)(total - sz)); ... and this checks that the arena is made up from repeating chunks. (Checking that the signature written into the last chunk is mirrored in the first one.) > + igt_assert(mlock(space, total) == 0); So this allocates all 64KiB definitely. > + close(memfd); > + > + /* Check we can create a normal userptr bo wrapping the wrapper */ > + gem_userptr(i915, space, total, false, userptr_flags, &rhandle); This is not read-only so rhandle is a bit misleading. Why do you btw create the whandle so early on and not just here? Hmm... whandle is chunk size, rhandle is arena size.. so the two loops below are different in that respect. Why is that? > + gem_set_domain(i915, rhandle, I915_GEM_DOMAIN_CPU, 0); > + for (size_t offset = 0; offset < total; offset += sz) > + store_dword(i915, rhandle, offset + 4, offset / sz); > + gem_sync(i915, rhandle); I did not get your last reply here - once store dwords have completed and you proceed to check the memory via CPU PTEs - do you need to move the userptr bo back to the CPU domain so any flushes would happen? > + igt_assert_eq_u32(*(uint32_t *)(pages + 0), (uint32_t)(total - sz)); > + igt_assert_eq_u32(*(uint32_t *)(pages + 4), (uint32_t)(total / sz - 1)); I really think comment explaining the layout and which side writes at which offset would be beneficial. > + gem_close(i915, rhandle); > + > + /* Now enforce read-only henceforth */ > + igt_assert(mprotect(space, total, PROT_READ) == 0); > + > + SHA1(pages, sz, orig); > + igt_fork(child, 1) { > + const int gen = intel_gen(intel_get_drm_devid(i915)); > + const int nreloc = 1024; > + struct drm_i915_gem_relocation_entry *reloc; > + struct drm_i915_gem_exec_object2 obj[2]; > + struct drm_i915_gem_execbuffer2 exec; > + unsigned char ref[SHA_DIGEST_LENGTH], result[SHA_DIGEST_LENGTH]; > + uint32_t *batch; > + int i; > + > + reloc = calloc(sizeof(*reloc), nreloc); > + gem_userptr(i915, space, total, true, userptr_flags, &rhandle); > + > + memset(obj, 0, sizeof(obj)); > + obj[0].flags = LOCAL_EXEC_OBJECT_SUPPORTS_48B; > + obj[1].handle = gem_create(i915, sz); I didn't get your previous reply. This is the batch buffer right? So the size needed is relating to the number of store dword + bbend you need to emit, rather than sz, no? And nreloc is arbitrary subset of sz / sizeof(uint32_t), right? So maybe: const int nreloc = sz / sizeof(uint32_t) / 16; /* arbitrary sub-size */ ... obj[1].handle = gem_create(i915, sizeof(uint32_t) + nreloc * sizeof(one_store_dword_sz)); Or I am missing something? > + obj[1].relocation_count = nreloc; > + obj[1].relocs_ptr = to_user_pointer(reloc); > + > + batch = gem_mmap__wc(i915, obj[1].handle, 0, sz, PROT_WRITE); > + > + memset(&exec, 0, sizeof(exec)); > + exec.buffer_count = 2; > + exec.buffers_ptr = to_user_pointer(obj); > + if (gen < 6) > + exec.flags |= I915_EXEC_SECURE; > + > + for_each_engine(i915, exec.flags) { > + /* First tweak the backing store through the write */ > + i = 0; > + obj[0].handle = whandle; > + for (int n = 0; n < nreloc; n++) { > + uint64_t offset; > + > + reloc[n].target_handle = obj[0].handle; > + reloc[n].delta = rand() % (sz / 4) * 4; > + reloc[n].offset = (i + 1) * sizeof(uint32_t); > + reloc[n].presumed_offset = obj[0].offset; > + reloc[n].read_domains = I915_GEM_DOMAIN_RENDER; > + reloc[n].write_domain = I915_GEM_DOMAIN_RENDER; How about: __fill_reloc(&reloc, &obj[0], delta, offset) ? > + > + offset = reloc[n].presumed_offset + reloc[n].delta; > + > + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); > + if (gen >= 8) { > + batch[++i] = offset; > + batch[++i] = offset >> 32; > + } else if (gen >= 4) { > + batch[++i] = 0; > + batch[++i] = offset; > + reloc[n].offset += sizeof(uint32_t); > + } else { > + batch[i]--; > + batch[++i] = offset; > + } > + batch[++i] = rand(); > + i++; > + } > + batch[i] = MI_BATCH_BUFFER_END; > + igt_assert(i * sizeof(uint32_t) < sz); > + > + gem_execbuf(i915, &exec); > + gem_sync(i915, obj[0].handle); > + SHA1(pages, sz, ref); > + > + igt_assert(memcmp(ref, orig, sizeof(ref))); > + memcpy(orig, ref, sizeof(orig)); > + > + /* Now try the same through the read-only handle */ > + i = 0; > + obj[0].handle = rhandle; > + for (int n = 0; n < nreloc; n++) { > + uint64_t offset; > + > + reloc[n].target_handle = obj[0].handle; > + reloc[n].delta = rand() % (total / 4) * 4; > + reloc[n].offset = (i + 1) * sizeof(uint32_t); > + reloc[n].presumed_offset = obj[0].offset; > + reloc[n].read_domains = I915_GEM_DOMAIN_RENDER; > + reloc[n].write_domain = I915_GEM_DOMAIN_RENDER; > + > + offset = reloc[n].presumed_offset + reloc[n].delta; > + > + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); > + if (gen >= 8) { > + batch[++i] = offset; > + batch[++i] = offset >> 32; > + } else if (gen >= 4) { > + batch[++i] = 0; > + batch[++i] = offset; > + reloc[n].offset += sizeof(uint32_t); > + } else { > + batch[i]--; > + batch[++i] = offset; > + } > + batch[++i] = rand(); > + i++; > + } > + batch[i] = MI_BATCH_BUFFER_END; > + > + gem_execbuf(i915, &exec); > + gem_sync(i915, obj[0].handle); > + SHA1(pages, sz, result); > + > + /* > + * As the writes into the read-only GPU bo should fail, > + * the SHA1 hash of the backing store should be > + * unaffected. > + */ > + igt_assert(memcmp(ref, result, SHA_DIGEST_LENGTH) == 0); > + } > + > + munmap(batch, sz); > + gem_close(i915, obj[1].handle); > + gem_close(i915, rhandle); > + } > + igt_waitchildren(); > + > + munmap(space, total); > + munmap(pages, sz); > +} > + > +static jmp_buf sigjmp; > +static void sigjmp_handler(int sig) > +{ > + siglongjmp(sigjmp, sig); > +} > + > +static void test_readonly_mmap(int i915) > +{ > + unsigned char original[SHA_DIGEST_LENGTH]; > + unsigned char result[SHA_DIGEST_LENGTH]; > + uint32_t handle; > + uint32_t sz; > + void *pages; > + void *ptr; > + int sig; > + > + /* > + * A quick check to ensure that we cannot circumvent the > + * read-only nature of our memory by creating a GTT mmap into > + * the pages. Imagine receiving a readonly SHM segment from > + * another process, or a readonly file mmap, it must remain readonly > + * on the GPU as well. > + */ > + > + igt_require(igt_setup_clflush()); > + > + sz = 16 << 12; > + pages = mmap(NULL, sz, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); > + igt_assert(pages != MAP_FAILED); > + > + igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &handle) == 0); > + gem_set_caching(i915, handle, 0); > + > + memset(pages, 0xa5, sz); > + igt_clflush_range(pages, sz); Please add comment saying why it is needed. > + SHA1(pages, sz, original); > + > + ptr = __gem_mmap__gtt(i915, handle, sz, PROT_WRITE); > + igt_assert(ptr == NULL); > + > + ptr = gem_mmap__gtt(i915, handle, sz, PROT_READ); > + gem_close(i915, handle); > + > + /* Check that a write into the GTT readonly map fails */ > + if (!(sig = sigsetjmp(sigjmp, 1))) { > + signal(SIGBUS, sigjmp_handler); > + signal(SIGSEGV, sigjmp_handler); > + memset(ptr, 0x5a, sz); > + igt_assert(0); > + } > + igt_assert_eq(sig, SIGSEGV); > + > + /* Check that we disallow removing the readonly protection */ > + igt_assert(mprotect(ptr, sz, PROT_WRITE)); > + if (!(sig = sigsetjmp(sigjmp, 1))) { Continuing from previous reply - there is no longjmp so I don't know who will jump here. Maybe it is just me since I am not familiar with the facility but I still have a feeling comment on high level setup here is warranted. Regards, Tvrtko > + signal(SIGBUS, sigjmp_handler); > + signal(SIGSEGV, sigjmp_handler); > + memset(ptr, 0x5a, sz); > + igt_assert(0); > + } > + igt_assert_eq(sig, SIGSEGV); > + > + /* A single read from the GTT pointer to prove that works */ > + igt_assert_eq_u32(*(uint8_t *)ptr, 0xa5); > + munmap(ptr, sz); > + > + /* Double check that the kernel did indeed not let any writes through */ > + igt_clflush_range(pages, sz); > + SHA1(pages, sz, result); > + igt_assert(!memcmp(original, result, sizeof(original))); > + > + munmap(pages, sz); > +} > + > +static void test_readonly_pwrite(int i915) > +{ > + unsigned char original[SHA_DIGEST_LENGTH]; > + unsigned char result[SHA_DIGEST_LENGTH]; > + uint32_t handle; > + uint32_t sz; > + void *pages; > + > + /* > + * Same as for GTT mmapings, we cannot alone ourselves to > + * circumvent readonly protection on a piece of memory via the > + * pwrite ioctl. > + */ > + > + igt_require(igt_setup_clflush()); > + > + sz = 16 << 12; > + pages = mmap(NULL, sz, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); > + igt_assert(pages != MAP_FAILED); > + > + igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &handle) == 0); > + memset(pages, 0xa5, sz); > + SHA1(pages, sz, original); > + > + for (int page = 0; page < 16; page++) { > + char data[4096]; > + > + memset(data, page, sizeof(data)); > + igt_assert_eq(__gem_write(i915, handle, page << 12, data, sizeof(data)), -EINVAL); > + } > + > + gem_close(i915, handle); > + > + SHA1(pages, sz, result); > + igt_assert(!memcmp(original, result, sizeof(original))); > + > + munmap(pages, sz); > +} > + > static int test_usage_restrictions(int fd) > { > void *ptr; > @@ -961,10 +1320,6 @@ static int test_usage_restrictions(int fd) > ret = __gem_userptr(fd, (char *)ptr + 1, PAGE_SIZE - 1, 0, userptr_flags, &handle); > igt_assert_neq(ret, 0); > > - /* Read-only not supported. */ > - ret = __gem_userptr(fd, (char *)ptr, PAGE_SIZE, 1, userptr_flags, &handle); > - igt_assert_neq(ret, 0); > - > free(ptr); > > return 0; > @@ -1502,6 +1857,15 @@ int main(int argc, char **argv) > igt_subtest("dmabuf-unsync") > test_dmabuf(); > > + igt_subtest("readonly-unsync") > + test_readonly(fd); > + > + igt_subtest("readonly-mmap-unsync") > + test_readonly_mmap(fd); > + > + igt_subtest("readonly-pwrite-unsync") > + test_readonly_pwrite(fd); > + > for (unsigned flags = 0; flags < ALL_FORKING_EVICTIONS + 1; flags++) { > igt_subtest_f("forked-unsync%s%s%s-%s", > flags & FORKING_EVICTIONS_SWAPPING ? "-swapping" : "", > _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH i-g-t v2] igt/gem_userptr: Check read-only mappings @ 2018-06-29 9:31 ` Tvrtko Ursulin 0 siblings, 0 replies; 18+ messages in thread From: Tvrtko Ursulin @ 2018-06-29 9:31 UTC (permalink / raw) To: Chris Wilson, intel-gfx; +Cc: igt-dev On 29/06/2018 08:44, Chris Wilson wrote: > Setup a userptr object that only has a read-only mapping back to a file > store (memfd). Then attempt to write into that mapping using the GPU and > assert that those writes do not land (while also writing via a writable > userptr mapping into the same memfd to verify that the GPU is working!) > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > --- > Minor commentary additions > --- > configure.ac | 1 + > lib/ioctl_wrappers.c | 4 +- > lib/ioctl_wrappers.h | 4 +- > lib/meson.build | 1 + > meson.build | 1 + > tests/Makefile.am | 4 +- > tests/gem_userptr_blits.c | 372 +++++++++++++++++++++++++++++++++++++- > 7 files changed, 377 insertions(+), 10 deletions(-) > > diff --git a/configure.ac b/configure.ac > index 1ee4e90e9..195963d4f 100644 > --- a/configure.ac > +++ b/configure.ac > @@ -125,6 +125,7 @@ PKG_CHECK_MODULES(PCIACCESS, [pciaccess >= 0.10]) > PKG_CHECK_MODULES(KMOD, [libkmod]) > PKG_CHECK_MODULES(PROCPS, [libprocps]) > PKG_CHECK_MODULES(LIBUNWIND, [libunwind]) > +PKG_CHECK_MODULES(SSL, [openssl]) > PKG_CHECK_MODULES(VALGRIND, [valgrind], [have_valgrind=yes], [have_valgrind=no]) > > if test x$have_valgrind = xyes; then > diff --git a/lib/ioctl_wrappers.c b/lib/ioctl_wrappers.c > index 79db44a8c..d5d2a4e4c 100644 > --- a/lib/ioctl_wrappers.c > +++ b/lib/ioctl_wrappers.c > @@ -869,7 +869,7 @@ int gem_madvise(int fd, uint32_t handle, int state) > return madv.retained; > } > > -int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle) > +int __gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle) > { > struct drm_i915_gem_userptr userptr; > > @@ -898,7 +898,7 @@ int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, ui > * > * Returns userptr handle for the GEM object. > */ > -void gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle) > +void gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle) > { > igt_assert_eq(__gem_userptr(fd, ptr, size, read_only, flags, handle), 0); > } > diff --git a/lib/ioctl_wrappers.h b/lib/ioctl_wrappers.h > index b966f72c9..8e2cd380b 100644 > --- a/lib/ioctl_wrappers.h > +++ b/lib/ioctl_wrappers.h > @@ -133,8 +133,8 @@ struct local_i915_gem_userptr { > #define LOCAL_I915_USERPTR_UNSYNCHRONIZED (1<<31) > uint32_t handle; > }; > -void gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle); > -int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle); > +void gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle); > +int __gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle); > > void gem_sw_finish(int fd, uint32_t handle); > > diff --git a/lib/meson.build b/lib/meson.build > index 1a355414e..939167f91 100644 > --- a/lib/meson.build > +++ b/lib/meson.build > @@ -62,6 +62,7 @@ lib_deps = [ > pthreads, > math, > realtime, > + ssl, > ] > > if libdrm_intel.found() > diff --git a/meson.build b/meson.build > index 4d15d6238..638c01066 100644 > --- a/meson.build > +++ b/meson.build > @@ -98,6 +98,7 @@ pciaccess = dependency('pciaccess', version : '>=0.10') > libkmod = dependency('libkmod') > libprocps = dependency('libprocps', required : true) > libunwind = dependency('libunwind', required : true) > +ssl = dependency('openssl', required : true) > > valgrind = null_dep > valgrindinfo = 'No' > diff --git a/tests/Makefile.am b/tests/Makefile.am > index f41ad5096..ba307b220 100644 > --- a/tests/Makefile.am > +++ b/tests/Makefile.am > @@ -126,8 +126,8 @@ gem_tiled_swapping_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > gem_tiled_swapping_LDADD = $(LDADD) -lpthread > prime_self_import_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > prime_self_import_LDADD = $(LDADD) -lpthread > -gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > -gem_userptr_blits_LDADD = $(LDADD) -lpthread > +gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) $(SSL_CFLAGS) > +gem_userptr_blits_LDADD = $(LDADD) $(SSL_LIBS) -lpthread > perf_pmu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la > > gem_eio_LDADD = $(LDADD) -lrt > diff --git a/tests/gem_userptr_blits.c b/tests/gem_userptr_blits.c > index 7e3b6ef38..0fb7eba04 100644 > --- a/tests/gem_userptr_blits.c > +++ b/tests/gem_userptr_blits.c > @@ -43,13 +43,17 @@ > #include <fcntl.h> > #include <inttypes.h> > #include <errno.h> > +#include <setjmp.h> > #include <sys/stat.h> > #include <sys/time.h> > #include <sys/mman.h> > +#include <openssl/sha.h> > #include <signal.h> > #include <pthread.h> > #include <time.h> > > +#include <linux/memfd.h> > + > #include "drm.h" > #include "i915_drm.h" > > @@ -238,6 +242,57 @@ blit(int fd, uint32_t dst, uint32_t src, uint32_t *all_bo, int n_bo) > return ret; > } > > +static void store_dword(int fd, uint32_t target, > + uint32_t offset, uint32_t value) > +{ > + const int gen = intel_gen(intel_get_drm_devid(fd)); > + struct drm_i915_gem_exec_object2 obj[2]; > + struct drm_i915_gem_relocation_entry reloc; > + struct drm_i915_gem_execbuffer2 execbuf; > + uint32_t batch[16]; > + int i; > + > + memset(&execbuf, 0, sizeof(execbuf)); > + execbuf.buffers_ptr = to_user_pointer(obj); > + execbuf.buffer_count = ARRAY_SIZE(obj); > + execbuf.flags = 0; > + if (gen < 6) > + execbuf.flags |= I915_EXEC_SECURE; > + > + memset(obj, 0, sizeof(obj)); > + obj[0].handle = target; > + obj[1].handle = gem_create(fd, 4096); > + > + memset(&reloc, 0, sizeof(reloc)); > + reloc.target_handle = obj[0].handle; > + reloc.presumed_offset = 0; > + reloc.offset = sizeof(uint32_t); > + reloc.delta = offset; > + reloc.read_domains = I915_GEM_DOMAIN_RENDER; > + reloc.write_domain = I915_GEM_DOMAIN_RENDER; > + obj[1].relocs_ptr = to_user_pointer(&reloc); > + obj[1].relocation_count = 1; > + > + i = 0; > + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); > + if (gen >= 8) { > + batch[++i] = offset; > + batch[++i] = 0; > + } else if (gen >= 4) { > + batch[++i] = 0; > + batch[++i] = offset; > + reloc.offset += sizeof(uint32_t); > + } else { > + batch[i]--; > + batch[++i] = offset; > + } > + batch[++i] = value; I still think to avoid too much code duplication: batch = __emit_store_dword(batch, gen, offset, value, &reloc.offset) Then from there see if something more could be extracted form the three call sites. > + batch[++i] = MI_BATCH_BUFFER_END; > + gem_write(fd, obj[1].handle, 0, batch, sizeof(batch)); > + gem_execbuf(fd, &execbuf); > + gem_close(fd, obj[1].handle); > +} > + > static uint32_t > create_userptr(int fd, uint32_t val, uint32_t *ptr) > { > @@ -941,6 +996,310 @@ static int test_dmabuf(void) > return 0; > } > > +static void test_readonly(int i915) > +{ > + unsigned char orig[SHA_DIGEST_LENGTH]; > + uint64_t aperture_size; > + uint32_t whandle, rhandle; > + size_t sz, total; > + void *pages, *space; > + int memfd; > + > + /* > + * A small batch of pages; small enough to cheaply check for stray > + * writes but large enough that we don't create too many VMA pointing > + * back to this set from the large arena. The limit on total number > + * of VMA for a process is 65,536 (at least on this kernel). > + * > + * We then write from the GPU through the large arena into the smaller > + * backing storage, which we can cheaply check to see if those writes > + * have landed (using a SHA1sum). Repeating the same random GPU writes > + * though a read-only handle to confirm that this time the writes are through > + * discarded and the backing store unchanged. > + */ > + sz = 16 << 12; Here you aim not to exceed the above mentioned per-process VMA limit but please just express that in the code. Maybe re-order the code a bit: total = 2Gib sz = 2Gib / max_vmas_per_process * 2 aperture_size = ... total = round_down Then proceed with allocation etc. > + memfd = memfd_create("pages", 0); > + igt_require(memfd != -1); > + igt_require(ftruncate(memfd, sz) == 0); > + > + pages = mmap(NULL, sz, PROT_WRITE, MAP_SHARED, memfd, 0); > + igt_assert(pages != MAP_FAILED); > + > + igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &rhandle) == 0); > + gem_close(i915, rhandle); > + > + gem_userptr(i915, pages, sz, false, userptr_flags, &whandle); > + /* From your reply: """ the largest offset we can use is 4G, and we can't use the full range as we need some extra room for batches, and we can't use the full VMA limit without serious slow down and risk of exhaustion. Then sticking to a pot. """ */ > + total = 2048ull << 20; > + aperture_size = gem_aperture_size(i915) / 2; > + if (aperture_size < total) > + total = aperture_size; > + total = total / sz * sz; > + igt_info("Using a %'zuB (%'zu pages) arena onto %zu pages\n", > + total, total >> 12, sz >> 12); > + > + /* Create an arena all pointing to the same set of pages */ > + space = mmap(NULL, total, PROT_READ, MAP_ANON | MAP_SHARED, -1, 0); Why MAP_SHARED? > + igt_require(space != MAP_FAILED); > + for (size_t offset = 0; offset < total; offset += sz) { > + igt_assert(mmap(space + offset, sz, > + PROT_WRITE, MAP_SHARED | MAP_FIXED, > + memfd, 0) != MAP_FAILED); > + *(uint32_t *)(space + offset) = offset; AFAIU: First write instantiates the backing store, well, one page of it I guess. Depending how memfd works I guess. But mlock later will do all of it. > + } > + igt_assert_eq_u32(*(uint32_t *)pages, (uint32_t)(total - sz)); ... and this checks that the arena is made up from repeating chunks. (Checking that the signature written into the last chunk is mirrored in the first one.) > + igt_assert(mlock(space, total) == 0); So this allocates all 64KiB definitely. > + close(memfd); > + > + /* Check we can create a normal userptr bo wrapping the wrapper */ > + gem_userptr(i915, space, total, false, userptr_flags, &rhandle); This is not read-only so rhandle is a bit misleading. Why do you btw create the whandle so early on and not just here? Hmm... whandle is chunk size, rhandle is arena size.. so the two loops below are different in that respect. Why is that? > + gem_set_domain(i915, rhandle, I915_GEM_DOMAIN_CPU, 0); > + for (size_t offset = 0; offset < total; offset += sz) > + store_dword(i915, rhandle, offset + 4, offset / sz); > + gem_sync(i915, rhandle); I did not get your last reply here - once store dwords have completed and you proceed to check the memory via CPU PTEs - do you need to move the userptr bo back to the CPU domain so any flushes would happen? > + igt_assert_eq_u32(*(uint32_t *)(pages + 0), (uint32_t)(total - sz)); > + igt_assert_eq_u32(*(uint32_t *)(pages + 4), (uint32_t)(total / sz - 1)); I really think comment explaining the layout and which side writes at which offset would be beneficial. > + gem_close(i915, rhandle); > + > + /* Now enforce read-only henceforth */ > + igt_assert(mprotect(space, total, PROT_READ) == 0); > + > + SHA1(pages, sz, orig); > + igt_fork(child, 1) { > + const int gen = intel_gen(intel_get_drm_devid(i915)); > + const int nreloc = 1024; > + struct drm_i915_gem_relocation_entry *reloc; > + struct drm_i915_gem_exec_object2 obj[2]; > + struct drm_i915_gem_execbuffer2 exec; > + unsigned char ref[SHA_DIGEST_LENGTH], result[SHA_DIGEST_LENGTH]; > + uint32_t *batch; > + int i; > + > + reloc = calloc(sizeof(*reloc), nreloc); > + gem_userptr(i915, space, total, true, userptr_flags, &rhandle); > + > + memset(obj, 0, sizeof(obj)); > + obj[0].flags = LOCAL_EXEC_OBJECT_SUPPORTS_48B; > + obj[1].handle = gem_create(i915, sz); I didn't get your previous reply. This is the batch buffer right? So the size needed is relating to the number of store dword + bbend you need to emit, rather than sz, no? And nreloc is arbitrary subset of sz / sizeof(uint32_t), right? So maybe: const int nreloc = sz / sizeof(uint32_t) / 16; /* arbitrary sub-size */ ... obj[1].handle = gem_create(i915, sizeof(uint32_t) + nreloc * sizeof(one_store_dword_sz)); Or I am missing something? > + obj[1].relocation_count = nreloc; > + obj[1].relocs_ptr = to_user_pointer(reloc); > + > + batch = gem_mmap__wc(i915, obj[1].handle, 0, sz, PROT_WRITE); > + > + memset(&exec, 0, sizeof(exec)); > + exec.buffer_count = 2; > + exec.buffers_ptr = to_user_pointer(obj); > + if (gen < 6) > + exec.flags |= I915_EXEC_SECURE; > + > + for_each_engine(i915, exec.flags) { > + /* First tweak the backing store through the write */ > + i = 0; > + obj[0].handle = whandle; > + for (int n = 0; n < nreloc; n++) { > + uint64_t offset; > + > + reloc[n].target_handle = obj[0].handle; > + reloc[n].delta = rand() % (sz / 4) * 4; > + reloc[n].offset = (i + 1) * sizeof(uint32_t); > + reloc[n].presumed_offset = obj[0].offset; > + reloc[n].read_domains = I915_GEM_DOMAIN_RENDER; > + reloc[n].write_domain = I915_GEM_DOMAIN_RENDER; How about: __fill_reloc(&reloc, &obj[0], delta, offset) ? > + > + offset = reloc[n].presumed_offset + reloc[n].delta; > + > + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); > + if (gen >= 8) { > + batch[++i] = offset; > + batch[++i] = offset >> 32; > + } else if (gen >= 4) { > + batch[++i] = 0; > + batch[++i] = offset; > + reloc[n].offset += sizeof(uint32_t); > + } else { > + batch[i]--; > + batch[++i] = offset; > + } > + batch[++i] = rand(); > + i++; > + } > + batch[i] = MI_BATCH_BUFFER_END; > + igt_assert(i * sizeof(uint32_t) < sz); > + > + gem_execbuf(i915, &exec); > + gem_sync(i915, obj[0].handle); > + SHA1(pages, sz, ref); > + > + igt_assert(memcmp(ref, orig, sizeof(ref))); > + memcpy(orig, ref, sizeof(orig)); > + > + /* Now try the same through the read-only handle */ > + i = 0; > + obj[0].handle = rhandle; > + for (int n = 0; n < nreloc; n++) { > + uint64_t offset; > + > + reloc[n].target_handle = obj[0].handle; > + reloc[n].delta = rand() % (total / 4) * 4; > + reloc[n].offset = (i + 1) * sizeof(uint32_t); > + reloc[n].presumed_offset = obj[0].offset; > + reloc[n].read_domains = I915_GEM_DOMAIN_RENDER; > + reloc[n].write_domain = I915_GEM_DOMAIN_RENDER; > + > + offset = reloc[n].presumed_offset + reloc[n].delta; > + > + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); > + if (gen >= 8) { > + batch[++i] = offset; > + batch[++i] = offset >> 32; > + } else if (gen >= 4) { > + batch[++i] = 0; > + batch[++i] = offset; > + reloc[n].offset += sizeof(uint32_t); > + } else { > + batch[i]--; > + batch[++i] = offset; > + } > + batch[++i] = rand(); > + i++; > + } > + batch[i] = MI_BATCH_BUFFER_END; > + > + gem_execbuf(i915, &exec); > + gem_sync(i915, obj[0].handle); > + SHA1(pages, sz, result); > + > + /* > + * As the writes into the read-only GPU bo should fail, > + * the SHA1 hash of the backing store should be > + * unaffected. > + */ > + igt_assert(memcmp(ref, result, SHA_DIGEST_LENGTH) == 0); > + } > + > + munmap(batch, sz); > + gem_close(i915, obj[1].handle); > + gem_close(i915, rhandle); > + } > + igt_waitchildren(); > + > + munmap(space, total); > + munmap(pages, sz); > +} > + > +static jmp_buf sigjmp; > +static void sigjmp_handler(int sig) > +{ > + siglongjmp(sigjmp, sig); > +} > + > +static void test_readonly_mmap(int i915) > +{ > + unsigned char original[SHA_DIGEST_LENGTH]; > + unsigned char result[SHA_DIGEST_LENGTH]; > + uint32_t handle; > + uint32_t sz; > + void *pages; > + void *ptr; > + int sig; > + > + /* > + * A quick check to ensure that we cannot circumvent the > + * read-only nature of our memory by creating a GTT mmap into > + * the pages. Imagine receiving a readonly SHM segment from > + * another process, or a readonly file mmap, it must remain readonly > + * on the GPU as well. > + */ > + > + igt_require(igt_setup_clflush()); > + > + sz = 16 << 12; > + pages = mmap(NULL, sz, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); > + igt_assert(pages != MAP_FAILED); > + > + igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &handle) == 0); > + gem_set_caching(i915, handle, 0); > + > + memset(pages, 0xa5, sz); > + igt_clflush_range(pages, sz); Please add comment saying why it is needed. > + SHA1(pages, sz, original); > + > + ptr = __gem_mmap__gtt(i915, handle, sz, PROT_WRITE); > + igt_assert(ptr == NULL); > + > + ptr = gem_mmap__gtt(i915, handle, sz, PROT_READ); > + gem_close(i915, handle); > + > + /* Check that a write into the GTT readonly map fails */ > + if (!(sig = sigsetjmp(sigjmp, 1))) { > + signal(SIGBUS, sigjmp_handler); > + signal(SIGSEGV, sigjmp_handler); > + memset(ptr, 0x5a, sz); > + igt_assert(0); > + } > + igt_assert_eq(sig, SIGSEGV); > + > + /* Check that we disallow removing the readonly protection */ > + igt_assert(mprotect(ptr, sz, PROT_WRITE)); > + if (!(sig = sigsetjmp(sigjmp, 1))) { Continuing from previous reply - there is no longjmp so I don't know who will jump here. Maybe it is just me since I am not familiar with the facility but I still have a feeling comment on high level setup here is warranted. Regards, Tvrtko > + signal(SIGBUS, sigjmp_handler); > + signal(SIGSEGV, sigjmp_handler); > + memset(ptr, 0x5a, sz); > + igt_assert(0); > + } > + igt_assert_eq(sig, SIGSEGV); > + > + /* A single read from the GTT pointer to prove that works */ > + igt_assert_eq_u32(*(uint8_t *)ptr, 0xa5); > + munmap(ptr, sz); > + > + /* Double check that the kernel did indeed not let any writes through */ > + igt_clflush_range(pages, sz); > + SHA1(pages, sz, result); > + igt_assert(!memcmp(original, result, sizeof(original))); > + > + munmap(pages, sz); > +} > + > +static void test_readonly_pwrite(int i915) > +{ > + unsigned char original[SHA_DIGEST_LENGTH]; > + unsigned char result[SHA_DIGEST_LENGTH]; > + uint32_t handle; > + uint32_t sz; > + void *pages; > + > + /* > + * Same as for GTT mmapings, we cannot alone ourselves to > + * circumvent readonly protection on a piece of memory via the > + * pwrite ioctl. > + */ > + > + igt_require(igt_setup_clflush()); > + > + sz = 16 << 12; > + pages = mmap(NULL, sz, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); > + igt_assert(pages != MAP_FAILED); > + > + igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &handle) == 0); > + memset(pages, 0xa5, sz); > + SHA1(pages, sz, original); > + > + for (int page = 0; page < 16; page++) { > + char data[4096]; > + > + memset(data, page, sizeof(data)); > + igt_assert_eq(__gem_write(i915, handle, page << 12, data, sizeof(data)), -EINVAL); > + } > + > + gem_close(i915, handle); > + > + SHA1(pages, sz, result); > + igt_assert(!memcmp(original, result, sizeof(original))); > + > + munmap(pages, sz); > +} > + > static int test_usage_restrictions(int fd) > { > void *ptr; > @@ -961,10 +1320,6 @@ static int test_usage_restrictions(int fd) > ret = __gem_userptr(fd, (char *)ptr + 1, PAGE_SIZE - 1, 0, userptr_flags, &handle); > igt_assert_neq(ret, 0); > > - /* Read-only not supported. */ > - ret = __gem_userptr(fd, (char *)ptr, PAGE_SIZE, 1, userptr_flags, &handle); > - igt_assert_neq(ret, 0); > - > free(ptr); > > return 0; > @@ -1502,6 +1857,15 @@ int main(int argc, char **argv) > igt_subtest("dmabuf-unsync") > test_dmabuf(); > > + igt_subtest("readonly-unsync") > + test_readonly(fd); > + > + igt_subtest("readonly-mmap-unsync") > + test_readonly_mmap(fd); > + > + igt_subtest("readonly-pwrite-unsync") > + test_readonly_pwrite(fd); > + > for (unsigned flags = 0; flags < ALL_FORKING_EVICTIONS + 1; flags++) { > igt_subtest_f("forked-unsync%s%s%s-%s", > flags & FORKING_EVICTIONS_SWAPPING ? "-swapping" : "", > _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [igt-dev] [Intel-gfx] [PATCH i-g-t v2] igt/gem_userptr: Check read-only mappings 2018-06-29 9:31 ` Tvrtko Ursulin @ 2018-06-29 9:44 ` Chris Wilson -1 siblings, 0 replies; 18+ messages in thread From: Chris Wilson @ 2018-06-29 9:44 UTC (permalink / raw) To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev Quoting Tvrtko Ursulin (2018-06-29 10:31:23) > > On 29/06/2018 08:44, Chris Wilson wrote: > > Setup a userptr object that only has a read-only mapping back to a file > > store (memfd). Then attempt to write into that mapping using the GPU and > > assert that those writes do not land (while also writing via a writable > > userptr mapping into the same memfd to verify that the GPU is working!) > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > > --- > > Minor commentary additions > > --- > > configure.ac | 1 + > > lib/ioctl_wrappers.c | 4 +- > > lib/ioctl_wrappers.h | 4 +- > > lib/meson.build | 1 + > > meson.build | 1 + > > tests/Makefile.am | 4 +- > > tests/gem_userptr_blits.c | 372 +++++++++++++++++++++++++++++++++++++- > > 7 files changed, 377 insertions(+), 10 deletions(-) > > > > diff --git a/configure.ac b/configure.ac > > index 1ee4e90e9..195963d4f 100644 > > --- a/configure.ac > > +++ b/configure.ac > > @@ -125,6 +125,7 @@ PKG_CHECK_MODULES(PCIACCESS, [pciaccess >= 0.10]) > > PKG_CHECK_MODULES(KMOD, [libkmod]) > > PKG_CHECK_MODULES(PROCPS, [libprocps]) > > PKG_CHECK_MODULES(LIBUNWIND, [libunwind]) > > +PKG_CHECK_MODULES(SSL, [openssl]) > > PKG_CHECK_MODULES(VALGRIND, [valgrind], [have_valgrind=yes], [have_valgrind=no]) > > > > if test x$have_valgrind = xyes; then > > diff --git a/lib/ioctl_wrappers.c b/lib/ioctl_wrappers.c > > index 79db44a8c..d5d2a4e4c 100644 > > --- a/lib/ioctl_wrappers.c > > +++ b/lib/ioctl_wrappers.c > > @@ -869,7 +869,7 @@ int gem_madvise(int fd, uint32_t handle, int state) > > return madv.retained; > > } > > > > -int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle) > > +int __gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle) > > { > > struct drm_i915_gem_userptr userptr; > > > > @@ -898,7 +898,7 @@ int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, ui > > * > > * Returns userptr handle for the GEM object. > > */ > > -void gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle) > > +void gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle) > > { > > igt_assert_eq(__gem_userptr(fd, ptr, size, read_only, flags, handle), 0); > > } > > diff --git a/lib/ioctl_wrappers.h b/lib/ioctl_wrappers.h > > index b966f72c9..8e2cd380b 100644 > > --- a/lib/ioctl_wrappers.h > > +++ b/lib/ioctl_wrappers.h > > @@ -133,8 +133,8 @@ struct local_i915_gem_userptr { > > #define LOCAL_I915_USERPTR_UNSYNCHRONIZED (1<<31) > > uint32_t handle; > > }; > > -void gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle); > > -int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle); > > +void gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle); > > +int __gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle); > > > > void gem_sw_finish(int fd, uint32_t handle); > > > > diff --git a/lib/meson.build b/lib/meson.build > > index 1a355414e..939167f91 100644 > > --- a/lib/meson.build > > +++ b/lib/meson.build > > @@ -62,6 +62,7 @@ lib_deps = [ > > pthreads, > > math, > > realtime, > > + ssl, > > ] > > > > if libdrm_intel.found() > > diff --git a/meson.build b/meson.build > > index 4d15d6238..638c01066 100644 > > --- a/meson.build > > +++ b/meson.build > > @@ -98,6 +98,7 @@ pciaccess = dependency('pciaccess', version : '>=0.10') > > libkmod = dependency('libkmod') > > libprocps = dependency('libprocps', required : true) > > libunwind = dependency('libunwind', required : true) > > +ssl = dependency('openssl', required : true) > > > > valgrind = null_dep > > valgrindinfo = 'No' > > diff --git a/tests/Makefile.am b/tests/Makefile.am > > index f41ad5096..ba307b220 100644 > > --- a/tests/Makefile.am > > +++ b/tests/Makefile.am > > @@ -126,8 +126,8 @@ gem_tiled_swapping_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > > gem_tiled_swapping_LDADD = $(LDADD) -lpthread > > prime_self_import_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > > prime_self_import_LDADD = $(LDADD) -lpthread > > -gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > > -gem_userptr_blits_LDADD = $(LDADD) -lpthread > > +gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) $(SSL_CFLAGS) > > +gem_userptr_blits_LDADD = $(LDADD) $(SSL_LIBS) -lpthread > > perf_pmu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la > > > > gem_eio_LDADD = $(LDADD) -lrt > > diff --git a/tests/gem_userptr_blits.c b/tests/gem_userptr_blits.c > > index 7e3b6ef38..0fb7eba04 100644 > > --- a/tests/gem_userptr_blits.c > > +++ b/tests/gem_userptr_blits.c > > @@ -43,13 +43,17 @@ > > #include <fcntl.h> > > #include <inttypes.h> > > #include <errno.h> > > +#include <setjmp.h> > > #include <sys/stat.h> > > #include <sys/time.h> > > #include <sys/mman.h> > > +#include <openssl/sha.h> > > #include <signal.h> > > #include <pthread.h> > > #include <time.h> > > > > +#include <linux/memfd.h> > > + > > #include "drm.h" > > #include "i915_drm.h" > > > > @@ -238,6 +242,57 @@ blit(int fd, uint32_t dst, uint32_t src, uint32_t *all_bo, int n_bo) > > return ret; > > } > > > > +static void store_dword(int fd, uint32_t target, > > + uint32_t offset, uint32_t value) > > +{ > > + const int gen = intel_gen(intel_get_drm_devid(fd)); > > + struct drm_i915_gem_exec_object2 obj[2]; > > + struct drm_i915_gem_relocation_entry reloc; > > + struct drm_i915_gem_execbuffer2 execbuf; > > + uint32_t batch[16]; > > + int i; > > + > > + memset(&execbuf, 0, sizeof(execbuf)); > > + execbuf.buffers_ptr = to_user_pointer(obj); > > + execbuf.buffer_count = ARRAY_SIZE(obj); > > + execbuf.flags = 0; > > + if (gen < 6) > > + execbuf.flags |= I915_EXEC_SECURE; > > + > > + memset(obj, 0, sizeof(obj)); > > + obj[0].handle = target; > > + obj[1].handle = gem_create(fd, 4096); > > + > > + memset(&reloc, 0, sizeof(reloc)); > > + reloc.target_handle = obj[0].handle; > > + reloc.presumed_offset = 0; > > + reloc.offset = sizeof(uint32_t); > > + reloc.delta = offset; > > + reloc.read_domains = I915_GEM_DOMAIN_RENDER; > > + reloc.write_domain = I915_GEM_DOMAIN_RENDER; > > + obj[1].relocs_ptr = to_user_pointer(&reloc); > > + obj[1].relocation_count = 1; > > + > > + i = 0; > > + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); > > + if (gen >= 8) { > > + batch[++i] = offset; > > + batch[++i] = 0; > > + } else if (gen >= 4) { > > + batch[++i] = 0; > > + batch[++i] = offset; > > + reloc.offset += sizeof(uint32_t); > > + } else { > > + batch[i]--; > > + batch[++i] = offset; > > + } > + batch[++i] = value; > > I still think to avoid too much code duplication: > > batch = __emit_store_dword(batch, gen, offset, value, &reloc.offset) > > Then from there see if something more could be extracted form the three > call sites. > > > + batch[++i] = MI_BATCH_BUFFER_END; > > + gem_write(fd, obj[1].handle, 0, batch, sizeof(batch)); > > + gem_execbuf(fd, &execbuf); > > + gem_close(fd, obj[1].handle); > > +} > > + > > static uint32_t > > create_userptr(int fd, uint32_t val, uint32_t *ptr) > > { > > @@ -941,6 +996,310 @@ static int test_dmabuf(void) > > return 0; > > } > > > > +static void test_readonly(int i915) > > +{ > > + unsigned char orig[SHA_DIGEST_LENGTH]; > > + uint64_t aperture_size; > > + uint32_t whandle, rhandle; > > + size_t sz, total; > > + void *pages, *space; > > + int memfd; > > + > > + /* > > + * A small batch of pages; small enough to cheaply check for stray > > + * writes but large enough that we don't create too many VMA pointing > > + * back to this set from the large arena. The limit on total number > > + * of VMA for a process is 65,536 (at least on this kernel). > > + * > > + * We then write from the GPU through the large arena into the smaller > > + * backing storage, which we can cheaply check to see if those writes > > + * have landed (using a SHA1sum). Repeating the same random GPU writes > > + * though a read-only handle to confirm that this time the writes are > > through > > > + * discarded and the backing store unchanged. > > + */ > > + sz = 16 << 12; > > Here you aim not to exceed the above mentioned per-process VMA limit but > please just express that in the code. Maybe re-order the code a bit: > > total = 2Gib > sz = 2Gib / max_vmas_per_process * 2 > aperture_size = ... > total = round_down > > Then proceed with allocation etc. > > > + memfd = memfd_create("pages", 0); > > + igt_require(memfd != -1); > > + igt_require(ftruncate(memfd, sz) == 0); > > + > > + pages = mmap(NULL, sz, PROT_WRITE, MAP_SHARED, memfd, 0); > > + igt_assert(pages != MAP_FAILED); > > + > > + igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &rhandle) == 0); > > + gem_close(i915, rhandle); > > + > > + gem_userptr(i915, pages, sz, false, userptr_flags, &whandle); > > + > > /* > From your reply: > """ > the largest offset we can use is 4G, and we can't use the full range > as we need some extra room for batches, and we can't use the full VMA > limit without serious slow down and risk of exhaustion. > Then sticking to a pot. > """ > */ > > > + total = 2048ull << 20; > > + aperture_size = gem_aperture_size(i915) / 2; > > + if (aperture_size < total) > > + total = aperture_size; > > + total = total / sz * sz; > > + igt_info("Using a %'zuB (%'zu pages) arena onto %zu pages\n", > > + total, total >> 12, sz >> 12); > > + > > + /* Create an arena all pointing to the same set of pages */ > > + space = mmap(NULL, total, PROT_READ, MAP_ANON | MAP_SHARED, -1, 0); > > Why MAP_SHARED? fork. > > + igt_require(space != MAP_FAILED); > > + for (size_t offset = 0; offset < total; offset += sz) { > > + igt_assert(mmap(space + offset, sz, > > + PROT_WRITE, MAP_SHARED | MAP_FIXED, > > + memfd, 0) != MAP_FAILED); > > + *(uint32_t *)(space + offset) = offset; > > AFAIU: > > First write instantiates the backing store, well, one page of it I > guess. Depending how memfd works I guess. But mlock later will do all of it. > > > + } > > + igt_assert_eq_u32(*(uint32_t *)pages, (uint32_t)(total - sz)); > > ... and this checks that the arena is made up from repeating chunks. > (Checking that the signature written into the last chunk is mirrored in > the first one.) > > > + igt_assert(mlock(space, total) == 0); > > So this allocates all 64KiB definitely. > > > + close(memfd); > > + > > + /* Check we can create a normal userptr bo wrapping the wrapper */ > > + gem_userptr(i915, space, total, false, userptr_flags, &rhandle); > > This is not read-only so rhandle is a bit misleading. Why do you btw > create the whandle so early on and not just here? Hmm... whandle is > chunk size, rhandle is arena size.. so the two loops below are different > in that respect. Why is that? Because the arena will be readonly, the backing store is writeable. > > + gem_set_domain(i915, rhandle, I915_GEM_DOMAIN_CPU, 0); > > + for (size_t offset = 0; offset < total; offset += sz) > > + store_dword(i915, rhandle, offset + 4, offset / sz); > > + gem_sync(i915, rhandle); > > I did not get your last reply here - once store dwords have completed > and you proceed to check the memory via CPU PTEs - do you need to move > the userptr bo back to the CPU domain so any flushes would happen? rhandle, it's a new userptr that we want to verify we can populate. > > + igt_assert_eq_u32(*(uint32_t *)(pages + 0), (uint32_t)(total - sz)); > > + igt_assert_eq_u32(*(uint32_t *)(pages + 4), (uint32_t)(total / sz - 1)); > > I really think comment explaining the layout and which side writes at > which offset would be beneficial. It's literally explained in the code in this block and never used again. > > + gem_close(i915, rhandle); > > + > > + /* Now enforce read-only henceforth */ > > + igt_assert(mprotect(space, total, PROT_READ) == 0); > > + > > + SHA1(pages, sz, orig); > > + igt_fork(child, 1) { > > + const int gen = intel_gen(intel_get_drm_devid(i915)); > > + const int nreloc = 1024; > > + struct drm_i915_gem_relocation_entry *reloc; > > + struct drm_i915_gem_exec_object2 obj[2]; > > + struct drm_i915_gem_execbuffer2 exec; > > + unsigned char ref[SHA_DIGEST_LENGTH], result[SHA_DIGEST_LENGTH]; > > + uint32_t *batch; > > + int i; > > + > > + reloc = calloc(sizeof(*reloc), nreloc); > > + gem_userptr(i915, space, total, true, userptr_flags, &rhandle); > > + > > + memset(obj, 0, sizeof(obj)); > > + obj[0].flags = LOCAL_EXEC_OBJECT_SUPPORTS_48B; > > + obj[1].handle = gem_create(i915, sz); > > I didn't get your previous reply. This is the batch buffer right? So the > size needed is relating to the number of store dword + bbend you need to > emit, rather than sz, no? And nreloc is arbitrary subset of sz / > sizeof(uint32_t), right? > > So maybe: > > const int nreloc = sz / sizeof(uint32_t) / 16; /* arbitrary sub-size */ > ... > obj[1].handle = gem_create(i915, sizeof(uint32_t) + nreloc * > sizeof(one_store_dword_sz)); > > Or I am missing something? Just pointless. > > + obj[1].relocation_count = nreloc; > > + obj[1].relocs_ptr = to_user_pointer(reloc); > > + > > + batch = gem_mmap__wc(i915, obj[1].handle, 0, sz, PROT_WRITE); > > + > > + memset(&exec, 0, sizeof(exec)); > > + exec.buffer_count = 2; > > + exec.buffers_ptr = to_user_pointer(obj); > > + if (gen < 6) > > + exec.flags |= I915_EXEC_SECURE; > > + > > + for_each_engine(i915, exec.flags) { > > + /* First tweak the backing store through the write */ > > + i = 0; > > + obj[0].handle = whandle; > > + for (int n = 0; n < nreloc; n++) { > > + uint64_t offset; > > + > > + reloc[n].target_handle = obj[0].handle; > > + reloc[n].delta = rand() % (sz / 4) * 4; > > + reloc[n].offset = (i + 1) * sizeof(uint32_t); > > + reloc[n].presumed_offset = obj[0].offset; > > + reloc[n].read_domains = I915_GEM_DOMAIN_RENDER; > > + reloc[n].write_domain = I915_GEM_DOMAIN_RENDER; > > How about: > > __fill_reloc(&reloc, &obj[0], delta, offset) ? No. I really dislike functions whose only purpose is to obfuscate copying their arguments into the struct passed in. Because it just makes it harder to adapt in future, whereas I think this is quite clear as to how the batch is constructed. > > + > > + offset = reloc[n].presumed_offset + reloc[n].delta; > > + > > + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); > > + if (gen >= 8) { > > + batch[++i] = offset; > > + batch[++i] = offset >> 32; > > + } else if (gen >= 4) { > > + batch[++i] = 0; > > + batch[++i] = offset; > > + reloc[n].offset += sizeof(uint32_t); > > + } else { > > + batch[i]--; > > + batch[++i] = offset; > > + } > > + batch[++i] = rand(); > > + i++; > > + } > > + batch[i] = MI_BATCH_BUFFER_END; > > + igt_assert(i * sizeof(uint32_t) < sz); > > + > > + gem_execbuf(i915, &exec); > > + gem_sync(i915, obj[0].handle); > > + SHA1(pages, sz, ref); > > + > > + igt_assert(memcmp(ref, orig, sizeof(ref))); > > + memcpy(orig, ref, sizeof(orig)); > > + > > + /* Now try the same through the read-only handle */ > > + i = 0; > > + obj[0].handle = rhandle; > > + for (int n = 0; n < nreloc; n++) { > > + uint64_t offset; > > + > > + reloc[n].target_handle = obj[0].handle; > > + reloc[n].delta = rand() % (total / 4) * 4; > > + reloc[n].offset = (i + 1) * sizeof(uint32_t); > > + reloc[n].presumed_offset = obj[0].offset; > > + reloc[n].read_domains = I915_GEM_DOMAIN_RENDER; > > + reloc[n].write_domain = I915_GEM_DOMAIN_RENDER; > > + > > + offset = reloc[n].presumed_offset + reloc[n].delta; > > + > > + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); > > + if (gen >= 8) { > > + batch[++i] = offset; > > + batch[++i] = offset >> 32; > > + } else if (gen >= 4) { > > + batch[++i] = 0; > > + batch[++i] = offset; > > + reloc[n].offset += sizeof(uint32_t); > > + } else { > > + batch[i]--; > > + batch[++i] = offset; > > + } > > + batch[++i] = rand(); > > + i++; > > + } > > + batch[i] = MI_BATCH_BUFFER_END; > > + > > + gem_execbuf(i915, &exec); > > + gem_sync(i915, obj[0].handle); > > + SHA1(pages, sz, result); > > + > > + /* > > + * As the writes into the read-only GPU bo should fail, > > + * the SHA1 hash of the backing store should be > > + * unaffected. > > + */ > > + igt_assert(memcmp(ref, result, SHA_DIGEST_LENGTH) == 0); > > + } > > + > > + munmap(batch, sz); > > + gem_close(i915, obj[1].handle); > > + gem_close(i915, rhandle); > > + } > > + igt_waitchildren(); > > + > > + munmap(space, total); > > + munmap(pages, sz); > > +} > > + > > +static jmp_buf sigjmp; > > +static void sigjmp_handler(int sig) > > +{ > > + siglongjmp(sigjmp, sig); > > +} > > + > > +static void test_readonly_mmap(int i915) > > +{ > > + unsigned char original[SHA_DIGEST_LENGTH]; > > + unsigned char result[SHA_DIGEST_LENGTH]; > > + uint32_t handle; > > + uint32_t sz; > > + void *pages; > > + void *ptr; > > + int sig; > > + > > + /* > > + * A quick check to ensure that we cannot circumvent the > > + * read-only nature of our memory by creating a GTT mmap into > > + * the pages. Imagine receiving a readonly SHM segment from > > + * another process, or a readonly file mmap, it must remain readonly > > + * on the GPU as well. > > + */ > > + > > + igt_require(igt_setup_clflush()); > > + > > + sz = 16 << 12; > > + pages = mmap(NULL, sz, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); > > + igt_assert(pages != MAP_FAILED); > > + > > + igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &handle) == 0); > > + gem_set_caching(i915, handle, 0); > > + > > + memset(pages, 0xa5, sz); > > + igt_clflush_range(pages, sz); > > Please add comment saying why it is needed. Hmm, is it not obvious from context? > > + SHA1(pages, sz, original); > > + > > + ptr = __gem_mmap__gtt(i915, handle, sz, PROT_WRITE); > > + igt_assert(ptr == NULL); > > + > > + ptr = gem_mmap__gtt(i915, handle, sz, PROT_READ); > > + gem_close(i915, handle); > > + > > + /* Check that a write into the GTT readonly map fails */ > > + if (!(sig = sigsetjmp(sigjmp, 1))) { > > + signal(SIGBUS, sigjmp_handler); > > + signal(SIGSEGV, sigjmp_handler); > > + memset(ptr, 0x5a, sz); > > + igt_assert(0); > > + } > > + igt_assert_eq(sig, SIGSEGV); > > + > > + /* Check that we disallow removing the readonly protection */ > > + igt_assert(mprotect(ptr, sz, PROT_WRITE)); > > + if (!(sig = sigsetjmp(sigjmp, 1))) { > > Continuing from previous reply - there is no longjmp so I don't know who > will jump here. Maybe it is just me since I am not familiar with the > facility but I still have a feeling comment on high level setup here is > warranted. Look at sigjmp_handler. -Chris _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH i-g-t v2] igt/gem_userptr: Check read-only mappings @ 2018-06-29 9:44 ` Chris Wilson 0 siblings, 0 replies; 18+ messages in thread From: Chris Wilson @ 2018-06-29 9:44 UTC (permalink / raw) To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev Quoting Tvrtko Ursulin (2018-06-29 10:31:23) > > On 29/06/2018 08:44, Chris Wilson wrote: > > Setup a userptr object that only has a read-only mapping back to a file > > store (memfd). Then attempt to write into that mapping using the GPU and > > assert that those writes do not land (while also writing via a writable > > userptr mapping into the same memfd to verify that the GPU is working!) > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > > --- > > Minor commentary additions > > --- > > configure.ac | 1 + > > lib/ioctl_wrappers.c | 4 +- > > lib/ioctl_wrappers.h | 4 +- > > lib/meson.build | 1 + > > meson.build | 1 + > > tests/Makefile.am | 4 +- > > tests/gem_userptr_blits.c | 372 +++++++++++++++++++++++++++++++++++++- > > 7 files changed, 377 insertions(+), 10 deletions(-) > > > > diff --git a/configure.ac b/configure.ac > > index 1ee4e90e9..195963d4f 100644 > > --- a/configure.ac > > +++ b/configure.ac > > @@ -125,6 +125,7 @@ PKG_CHECK_MODULES(PCIACCESS, [pciaccess >= 0.10]) > > PKG_CHECK_MODULES(KMOD, [libkmod]) > > PKG_CHECK_MODULES(PROCPS, [libprocps]) > > PKG_CHECK_MODULES(LIBUNWIND, [libunwind]) > > +PKG_CHECK_MODULES(SSL, [openssl]) > > PKG_CHECK_MODULES(VALGRIND, [valgrind], [have_valgrind=yes], [have_valgrind=no]) > > > > if test x$have_valgrind = xyes; then > > diff --git a/lib/ioctl_wrappers.c b/lib/ioctl_wrappers.c > > index 79db44a8c..d5d2a4e4c 100644 > > --- a/lib/ioctl_wrappers.c > > +++ b/lib/ioctl_wrappers.c > > @@ -869,7 +869,7 @@ int gem_madvise(int fd, uint32_t handle, int state) > > return madv.retained; > > } > > > > -int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle) > > +int __gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle) > > { > > struct drm_i915_gem_userptr userptr; > > > > @@ -898,7 +898,7 @@ int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, ui > > * > > * Returns userptr handle for the GEM object. > > */ > > -void gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle) > > +void gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle) > > { > > igt_assert_eq(__gem_userptr(fd, ptr, size, read_only, flags, handle), 0); > > } > > diff --git a/lib/ioctl_wrappers.h b/lib/ioctl_wrappers.h > > index b966f72c9..8e2cd380b 100644 > > --- a/lib/ioctl_wrappers.h > > +++ b/lib/ioctl_wrappers.h > > @@ -133,8 +133,8 @@ struct local_i915_gem_userptr { > > #define LOCAL_I915_USERPTR_UNSYNCHRONIZED (1<<31) > > uint32_t handle; > > }; > > -void gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle); > > -int __gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t flags, uint32_t *handle); > > +void gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle); > > +int __gem_userptr(int fd, void *ptr, uint64_t size, int read_only, uint32_t flags, uint32_t *handle); > > > > void gem_sw_finish(int fd, uint32_t handle); > > > > diff --git a/lib/meson.build b/lib/meson.build > > index 1a355414e..939167f91 100644 > > --- a/lib/meson.build > > +++ b/lib/meson.build > > @@ -62,6 +62,7 @@ lib_deps = [ > > pthreads, > > math, > > realtime, > > + ssl, > > ] > > > > if libdrm_intel.found() > > diff --git a/meson.build b/meson.build > > index 4d15d6238..638c01066 100644 > > --- a/meson.build > > +++ b/meson.build > > @@ -98,6 +98,7 @@ pciaccess = dependency('pciaccess', version : '>=0.10') > > libkmod = dependency('libkmod') > > libprocps = dependency('libprocps', required : true) > > libunwind = dependency('libunwind', required : true) > > +ssl = dependency('openssl', required : true) > > > > valgrind = null_dep > > valgrindinfo = 'No' > > diff --git a/tests/Makefile.am b/tests/Makefile.am > > index f41ad5096..ba307b220 100644 > > --- a/tests/Makefile.am > > +++ b/tests/Makefile.am > > @@ -126,8 +126,8 @@ gem_tiled_swapping_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > > gem_tiled_swapping_LDADD = $(LDADD) -lpthread > > prime_self_import_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > > prime_self_import_LDADD = $(LDADD) -lpthread > > -gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) > > -gem_userptr_blits_LDADD = $(LDADD) -lpthread > > +gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS) $(SSL_CFLAGS) > > +gem_userptr_blits_LDADD = $(LDADD) $(SSL_LIBS) -lpthread > > perf_pmu_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la > > > > gem_eio_LDADD = $(LDADD) -lrt > > diff --git a/tests/gem_userptr_blits.c b/tests/gem_userptr_blits.c > > index 7e3b6ef38..0fb7eba04 100644 > > --- a/tests/gem_userptr_blits.c > > +++ b/tests/gem_userptr_blits.c > > @@ -43,13 +43,17 @@ > > #include <fcntl.h> > > #include <inttypes.h> > > #include <errno.h> > > +#include <setjmp.h> > > #include <sys/stat.h> > > #include <sys/time.h> > > #include <sys/mman.h> > > +#include <openssl/sha.h> > > #include <signal.h> > > #include <pthread.h> > > #include <time.h> > > > > +#include <linux/memfd.h> > > + > > #include "drm.h" > > #include "i915_drm.h" > > > > @@ -238,6 +242,57 @@ blit(int fd, uint32_t dst, uint32_t src, uint32_t *all_bo, int n_bo) > > return ret; > > } > > > > +static void store_dword(int fd, uint32_t target, > > + uint32_t offset, uint32_t value) > > +{ > > + const int gen = intel_gen(intel_get_drm_devid(fd)); > > + struct drm_i915_gem_exec_object2 obj[2]; > > + struct drm_i915_gem_relocation_entry reloc; > > + struct drm_i915_gem_execbuffer2 execbuf; > > + uint32_t batch[16]; > > + int i; > > + > > + memset(&execbuf, 0, sizeof(execbuf)); > > + execbuf.buffers_ptr = to_user_pointer(obj); > > + execbuf.buffer_count = ARRAY_SIZE(obj); > > + execbuf.flags = 0; > > + if (gen < 6) > > + execbuf.flags |= I915_EXEC_SECURE; > > + > > + memset(obj, 0, sizeof(obj)); > > + obj[0].handle = target; > > + obj[1].handle = gem_create(fd, 4096); > > + > > + memset(&reloc, 0, sizeof(reloc)); > > + reloc.target_handle = obj[0].handle; > > + reloc.presumed_offset = 0; > > + reloc.offset = sizeof(uint32_t); > > + reloc.delta = offset; > > + reloc.read_domains = I915_GEM_DOMAIN_RENDER; > > + reloc.write_domain = I915_GEM_DOMAIN_RENDER; > > + obj[1].relocs_ptr = to_user_pointer(&reloc); > > + obj[1].relocation_count = 1; > > + > > + i = 0; > > + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); > > + if (gen >= 8) { > > + batch[++i] = offset; > > + batch[++i] = 0; > > + } else if (gen >= 4) { > > + batch[++i] = 0; > > + batch[++i] = offset; > > + reloc.offset += sizeof(uint32_t); > > + } else { > > + batch[i]--; > > + batch[++i] = offset; > > + } > + batch[++i] = value; > > I still think to avoid too much code duplication: > > batch = __emit_store_dword(batch, gen, offset, value, &reloc.offset) > > Then from there see if something more could be extracted form the three > call sites. > > > + batch[++i] = MI_BATCH_BUFFER_END; > > + gem_write(fd, obj[1].handle, 0, batch, sizeof(batch)); > > + gem_execbuf(fd, &execbuf); > > + gem_close(fd, obj[1].handle); > > +} > > + > > static uint32_t > > create_userptr(int fd, uint32_t val, uint32_t *ptr) > > { > > @@ -941,6 +996,310 @@ static int test_dmabuf(void) > > return 0; > > } > > > > +static void test_readonly(int i915) > > +{ > > + unsigned char orig[SHA_DIGEST_LENGTH]; > > + uint64_t aperture_size; > > + uint32_t whandle, rhandle; > > + size_t sz, total; > > + void *pages, *space; > > + int memfd; > > + > > + /* > > + * A small batch of pages; small enough to cheaply check for stray > > + * writes but large enough that we don't create too many VMA pointing > > + * back to this set from the large arena. The limit on total number > > + * of VMA for a process is 65,536 (at least on this kernel). > > + * > > + * We then write from the GPU through the large arena into the smaller > > + * backing storage, which we can cheaply check to see if those writes > > + * have landed (using a SHA1sum). Repeating the same random GPU writes > > + * though a read-only handle to confirm that this time the writes are > > through > > > + * discarded and the backing store unchanged. > > + */ > > + sz = 16 << 12; > > Here you aim not to exceed the above mentioned per-process VMA limit but > please just express that in the code. Maybe re-order the code a bit: > > total = 2Gib > sz = 2Gib / max_vmas_per_process * 2 > aperture_size = ... > total = round_down > > Then proceed with allocation etc. > > > + memfd = memfd_create("pages", 0); > > + igt_require(memfd != -1); > > + igt_require(ftruncate(memfd, sz) == 0); > > + > > + pages = mmap(NULL, sz, PROT_WRITE, MAP_SHARED, memfd, 0); > > + igt_assert(pages != MAP_FAILED); > > + > > + igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &rhandle) == 0); > > + gem_close(i915, rhandle); > > + > > + gem_userptr(i915, pages, sz, false, userptr_flags, &whandle); > > + > > /* > From your reply: > """ > the largest offset we can use is 4G, and we can't use the full range > as we need some extra room for batches, and we can't use the full VMA > limit without serious slow down and risk of exhaustion. > Then sticking to a pot. > """ > */ > > > + total = 2048ull << 20; > > + aperture_size = gem_aperture_size(i915) / 2; > > + if (aperture_size < total) > > + total = aperture_size; > > + total = total / sz * sz; > > + igt_info("Using a %'zuB (%'zu pages) arena onto %zu pages\n", > > + total, total >> 12, sz >> 12); > > + > > + /* Create an arena all pointing to the same set of pages */ > > + space = mmap(NULL, total, PROT_READ, MAP_ANON | MAP_SHARED, -1, 0); > > Why MAP_SHARED? fork. > > + igt_require(space != MAP_FAILED); > > + for (size_t offset = 0; offset < total; offset += sz) { > > + igt_assert(mmap(space + offset, sz, > > + PROT_WRITE, MAP_SHARED | MAP_FIXED, > > + memfd, 0) != MAP_FAILED); > > + *(uint32_t *)(space + offset) = offset; > > AFAIU: > > First write instantiates the backing store, well, one page of it I > guess. Depending how memfd works I guess. But mlock later will do all of it. > > > + } > > + igt_assert_eq_u32(*(uint32_t *)pages, (uint32_t)(total - sz)); > > ... and this checks that the arena is made up from repeating chunks. > (Checking that the signature written into the last chunk is mirrored in > the first one.) > > > + igt_assert(mlock(space, total) == 0); > > So this allocates all 64KiB definitely. > > > + close(memfd); > > + > > + /* Check we can create a normal userptr bo wrapping the wrapper */ > > + gem_userptr(i915, space, total, false, userptr_flags, &rhandle); > > This is not read-only so rhandle is a bit misleading. Why do you btw > create the whandle so early on and not just here? Hmm... whandle is > chunk size, rhandle is arena size.. so the two loops below are different > in that respect. Why is that? Because the arena will be readonly, the backing store is writeable. > > + gem_set_domain(i915, rhandle, I915_GEM_DOMAIN_CPU, 0); > > + for (size_t offset = 0; offset < total; offset += sz) > > + store_dword(i915, rhandle, offset + 4, offset / sz); > > + gem_sync(i915, rhandle); > > I did not get your last reply here - once store dwords have completed > and you proceed to check the memory via CPU PTEs - do you need to move > the userptr bo back to the CPU domain so any flushes would happen? rhandle, it's a new userptr that we want to verify we can populate. > > + igt_assert_eq_u32(*(uint32_t *)(pages + 0), (uint32_t)(total - sz)); > > + igt_assert_eq_u32(*(uint32_t *)(pages + 4), (uint32_t)(total / sz - 1)); > > I really think comment explaining the layout and which side writes at > which offset would be beneficial. It's literally explained in the code in this block and never used again. > > + gem_close(i915, rhandle); > > + > > + /* Now enforce read-only henceforth */ > > + igt_assert(mprotect(space, total, PROT_READ) == 0); > > + > > + SHA1(pages, sz, orig); > > + igt_fork(child, 1) { > > + const int gen = intel_gen(intel_get_drm_devid(i915)); > > + const int nreloc = 1024; > > + struct drm_i915_gem_relocation_entry *reloc; > > + struct drm_i915_gem_exec_object2 obj[2]; > > + struct drm_i915_gem_execbuffer2 exec; > > + unsigned char ref[SHA_DIGEST_LENGTH], result[SHA_DIGEST_LENGTH]; > > + uint32_t *batch; > > + int i; > > + > > + reloc = calloc(sizeof(*reloc), nreloc); > > + gem_userptr(i915, space, total, true, userptr_flags, &rhandle); > > + > > + memset(obj, 0, sizeof(obj)); > > + obj[0].flags = LOCAL_EXEC_OBJECT_SUPPORTS_48B; > > + obj[1].handle = gem_create(i915, sz); > > I didn't get your previous reply. This is the batch buffer right? So the > size needed is relating to the number of store dword + bbend you need to > emit, rather than sz, no? And nreloc is arbitrary subset of sz / > sizeof(uint32_t), right? > > So maybe: > > const int nreloc = sz / sizeof(uint32_t) / 16; /* arbitrary sub-size */ > ... > obj[1].handle = gem_create(i915, sizeof(uint32_t) + nreloc * > sizeof(one_store_dword_sz)); > > Or I am missing something? Just pointless. > > + obj[1].relocation_count = nreloc; > > + obj[1].relocs_ptr = to_user_pointer(reloc); > > + > > + batch = gem_mmap__wc(i915, obj[1].handle, 0, sz, PROT_WRITE); > > + > > + memset(&exec, 0, sizeof(exec)); > > + exec.buffer_count = 2; > > + exec.buffers_ptr = to_user_pointer(obj); > > + if (gen < 6) > > + exec.flags |= I915_EXEC_SECURE; > > + > > + for_each_engine(i915, exec.flags) { > > + /* First tweak the backing store through the write */ > > + i = 0; > > + obj[0].handle = whandle; > > + for (int n = 0; n < nreloc; n++) { > > + uint64_t offset; > > + > > + reloc[n].target_handle = obj[0].handle; > > + reloc[n].delta = rand() % (sz / 4) * 4; > > + reloc[n].offset = (i + 1) * sizeof(uint32_t); > > + reloc[n].presumed_offset = obj[0].offset; > > + reloc[n].read_domains = I915_GEM_DOMAIN_RENDER; > > + reloc[n].write_domain = I915_GEM_DOMAIN_RENDER; > > How about: > > __fill_reloc(&reloc, &obj[0], delta, offset) ? No. I really dislike functions whose only purpose is to obfuscate copying their arguments into the struct passed in. Because it just makes it harder to adapt in future, whereas I think this is quite clear as to how the batch is constructed. > > + > > + offset = reloc[n].presumed_offset + reloc[n].delta; > > + > > + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); > > + if (gen >= 8) { > > + batch[++i] = offset; > > + batch[++i] = offset >> 32; > > + } else if (gen >= 4) { > > + batch[++i] = 0; > > + batch[++i] = offset; > > + reloc[n].offset += sizeof(uint32_t); > > + } else { > > + batch[i]--; > > + batch[++i] = offset; > > + } > > + batch[++i] = rand(); > > + i++; > > + } > > + batch[i] = MI_BATCH_BUFFER_END; > > + igt_assert(i * sizeof(uint32_t) < sz); > > + > > + gem_execbuf(i915, &exec); > > + gem_sync(i915, obj[0].handle); > > + SHA1(pages, sz, ref); > > + > > + igt_assert(memcmp(ref, orig, sizeof(ref))); > > + memcpy(orig, ref, sizeof(orig)); > > + > > + /* Now try the same through the read-only handle */ > > + i = 0; > > + obj[0].handle = rhandle; > > + for (int n = 0; n < nreloc; n++) { > > + uint64_t offset; > > + > > + reloc[n].target_handle = obj[0].handle; > > + reloc[n].delta = rand() % (total / 4) * 4; > > + reloc[n].offset = (i + 1) * sizeof(uint32_t); > > + reloc[n].presumed_offset = obj[0].offset; > > + reloc[n].read_domains = I915_GEM_DOMAIN_RENDER; > > + reloc[n].write_domain = I915_GEM_DOMAIN_RENDER; > > + > > + offset = reloc[n].presumed_offset + reloc[n].delta; > > + > > + batch[i] = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0); > > + if (gen >= 8) { > > + batch[++i] = offset; > > + batch[++i] = offset >> 32; > > + } else if (gen >= 4) { > > + batch[++i] = 0; > > + batch[++i] = offset; > > + reloc[n].offset += sizeof(uint32_t); > > + } else { > > + batch[i]--; > > + batch[++i] = offset; > > + } > > + batch[++i] = rand(); > > + i++; > > + } > > + batch[i] = MI_BATCH_BUFFER_END; > > + > > + gem_execbuf(i915, &exec); > > + gem_sync(i915, obj[0].handle); > > + SHA1(pages, sz, result); > > + > > + /* > > + * As the writes into the read-only GPU bo should fail, > > + * the SHA1 hash of the backing store should be > > + * unaffected. > > + */ > > + igt_assert(memcmp(ref, result, SHA_DIGEST_LENGTH) == 0); > > + } > > + > > + munmap(batch, sz); > > + gem_close(i915, obj[1].handle); > > + gem_close(i915, rhandle); > > + } > > + igt_waitchildren(); > > + > > + munmap(space, total); > > + munmap(pages, sz); > > +} > > + > > +static jmp_buf sigjmp; > > +static void sigjmp_handler(int sig) > > +{ > > + siglongjmp(sigjmp, sig); > > +} > > + > > +static void test_readonly_mmap(int i915) > > +{ > > + unsigned char original[SHA_DIGEST_LENGTH]; > > + unsigned char result[SHA_DIGEST_LENGTH]; > > + uint32_t handle; > > + uint32_t sz; > > + void *pages; > > + void *ptr; > > + int sig; > > + > > + /* > > + * A quick check to ensure that we cannot circumvent the > > + * read-only nature of our memory by creating a GTT mmap into > > + * the pages. Imagine receiving a readonly SHM segment from > > + * another process, or a readonly file mmap, it must remain readonly > > + * on the GPU as well. > > + */ > > + > > + igt_require(igt_setup_clflush()); > > + > > + sz = 16 << 12; > > + pages = mmap(NULL, sz, PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); > > + igt_assert(pages != MAP_FAILED); > > + > > + igt_require(__gem_userptr(i915, pages, sz, true, userptr_flags, &handle) == 0); > > + gem_set_caching(i915, handle, 0); > > + > > + memset(pages, 0xa5, sz); > > + igt_clflush_range(pages, sz); > > Please add comment saying why it is needed. Hmm, is it not obvious from context? > > + SHA1(pages, sz, original); > > + > > + ptr = __gem_mmap__gtt(i915, handle, sz, PROT_WRITE); > > + igt_assert(ptr == NULL); > > + > > + ptr = gem_mmap__gtt(i915, handle, sz, PROT_READ); > > + gem_close(i915, handle); > > + > > + /* Check that a write into the GTT readonly map fails */ > > + if (!(sig = sigsetjmp(sigjmp, 1))) { > > + signal(SIGBUS, sigjmp_handler); > > + signal(SIGSEGV, sigjmp_handler); > > + memset(ptr, 0x5a, sz); > > + igt_assert(0); > > + } > > + igt_assert_eq(sig, SIGSEGV); > > + > > + /* Check that we disallow removing the readonly protection */ > > + igt_assert(mprotect(ptr, sz, PROT_WRITE)); > > + if (!(sig = sigsetjmp(sigjmp, 1))) { > > Continuing from previous reply - there is no longjmp so I don't know who > will jump here. Maybe it is just me since I am not familiar with the > facility but I still have a feeling comment on high level setup here is > warranted. Look at sigjmp_handler. -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 18+ messages in thread
* [igt-dev] ✓ Fi.CI.BAT: success for igt/gem_exec_gttfill: Avoid pwrite into busy handle (rev2) 2018-06-28 21:35 ` Chris Wilson ` (3 preceding siblings ...) (?) @ 2018-06-29 8:12 ` Patchwork -1 siblings, 0 replies; 18+ messages in thread From: Patchwork @ 2018-06-29 8:12 UTC (permalink / raw) To: Chris Wilson; +Cc: igt-dev == Series Details == Series: igt/gem_exec_gttfill: Avoid pwrite into busy handle (rev2) URL : https://patchwork.freedesktop.org/series/45620/ State : success == Summary == = CI Bug Log - changes from CI_DRM_4401 -> IGTPW_1512 = == Summary - SUCCESS == No regressions found. External URL: https://patchwork.freedesktop.org/api/1.0/series/45620/revisions/2/mbox/ == Changes == No changes found == Participating hosts (43 -> 38) == Missing (5): fi-byt-j1900 fi-ilk-m540 fi-byt-squawks fi-bsw-cyan fi-hsw-4200u == Build changes == * IGT: IGT_4530 -> IGTPW_1512 CI_DRM_4401: 4fe59a304a9a855a1c0e9a576c94d4cca239b427 @ git://anongit.freedesktop.org/gfx-ci/linux IGTPW_1512: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1512/ IGT_4530: 0e98bf69f146eb72fe3a7c3b19a049b5786f0ca3 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools == Testlist changes == +igt@gem_userptr_blits@readonly-mmap-unsync +igt@gem_userptr_blits@readonly-pwrite-unsync +igt@gem_userptr_blits@readonly-unsync == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1512/issues.html _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 18+ messages in thread
* [igt-dev] ✓ Fi.CI.IGT: success for igt/gem_exec_gttfill: Avoid pwrite into busy handle (rev2) 2018-06-28 21:35 ` Chris Wilson ` (4 preceding siblings ...) (?) @ 2018-06-29 9:37 ` Patchwork -1 siblings, 0 replies; 18+ messages in thread From: Patchwork @ 2018-06-29 9:37 UTC (permalink / raw) To: Chris Wilson; +Cc: igt-dev == Series Details == Series: igt/gem_exec_gttfill: Avoid pwrite into busy handle (rev2) URL : https://patchwork.freedesktop.org/series/45620/ State : success == Summary == = CI Bug Log - changes from IGT_4530_full -> IGTPW_1512_full = == Summary - WARNING == Minor unknown changes coming with IGTPW_1512_full need to be verified manually. If you think the reported changes have nothing to do with the changes introduced in IGTPW_1512_full, please notify your bug team to allow them to document this new failure mode, which will reduce false positives in CI. External URL: https://patchwork.freedesktop.org/api/1.0/series/45620/revisions/2/mbox/ == Possible new issues == Here are the unknown changes that may have been introduced in IGTPW_1512_full: === IGT changes === ==== Warnings ==== igt@gem_exec_schedule@deep-bsd1: shard-kbl: PASS -> SKIP igt@gem_linear_blits@interruptible: shard-glk: SKIP -> PASS shard-apl: SKIP -> PASS igt@perf_pmu@rc6: shard-kbl: SKIP -> PASS == Known issues == Here are the changes found in IGTPW_1512_full that come from known issues: === IGT changes === ==== Issues hit ==== igt@drv_selftest@live_gtt: shard-kbl: PASS -> FAIL (fdo#105347) igt@drv_selftest@live_hangcheck: shard-kbl: PASS -> DMESG-FAIL (fdo#106560, fdo#106947) shard-apl: PASS -> DMESG-FAIL (fdo#106560, fdo#106947) igt@drv_suspend@shrink: shard-kbl: PASS -> INCOMPLETE (fdo#106886, fdo#103665) igt@kms_flip@2x-plain-flip-ts-check-interruptible: shard-glk: PASS -> FAIL (fdo#100368) +1 igt@kms_flip@flip-vs-expired-vblank: shard-glk: PASS -> FAIL (fdo#105189) igt@kms_flip_tiling@flip-to-y-tiled: shard-glk: PASS -> FAIL (fdo#103822, fdo#104724) igt@kms_rotation_crc@sprite-rotation-180: shard-snb: PASS -> FAIL (fdo#103925, fdo#104724) ==== Possible fixes ==== igt@gem_ctx_isolation@rcs0-s3: shard-kbl: INCOMPLETE (fdo#103665) -> PASS igt@kms_cursor_legacy@cursora-vs-flipa-toggle: shard-glk: DMESG-WARN (fdo#105763) -> PASS igt@kms_flip@2x-plain-flip-fb-recreate: shard-glk: FAIL (fdo#100368) -> PASS igt@kms_flip@flip-vs-expired-vblank: shard-hsw: FAIL (fdo#105363, fdo#102887) -> PASS igt@perf_pmu@busy-accuracy-98-vcs1: shard-snb: INCOMPLETE (fdo#105411) -> SKIP ==== Warnings ==== igt@drv_selftest@live_gtt: shard-glk: FAIL (fdo#105347) -> INCOMPLETE (k.org#198133, fdo#103359) fdo#100368 https://bugs.freedesktop.org/show_bug.cgi?id=100368 fdo#102887 https://bugs.freedesktop.org/show_bug.cgi?id=102887 fdo#103359 https://bugs.freedesktop.org/show_bug.cgi?id=103359 fdo#103665 https://bugs.freedesktop.org/show_bug.cgi?id=103665 fdo#103822 https://bugs.freedesktop.org/show_bug.cgi?id=103822 fdo#103925 https://bugs.freedesktop.org/show_bug.cgi?id=103925 fdo#104724 https://bugs.freedesktop.org/show_bug.cgi?id=104724 fdo#105189 https://bugs.freedesktop.org/show_bug.cgi?id=105189 fdo#105347 https://bugs.freedesktop.org/show_bug.cgi?id=105347 fdo#105363 https://bugs.freedesktop.org/show_bug.cgi?id=105363 fdo#105411 https://bugs.freedesktop.org/show_bug.cgi?id=105411 fdo#105763 https://bugs.freedesktop.org/show_bug.cgi?id=105763 fdo#106560 https://bugs.freedesktop.org/show_bug.cgi?id=106560 fdo#106886 https://bugs.freedesktop.org/show_bug.cgi?id=106886 fdo#106947 https://bugs.freedesktop.org/show_bug.cgi?id=106947 k.org#198133 https://bugzilla.kernel.org/show_bug.cgi?id=198133 == Participating hosts (5 -> 5) == No changes in participating hosts == Build changes == * IGT: IGT_4530 -> IGTPW_1512 * Linux: CI_DRM_4373 -> CI_DRM_4401 CI_DRM_4373: be7193758db79443ad5dc45072a166746819ba7e @ git://anongit.freedesktop.org/gfx-ci/linux CI_DRM_4401: 4fe59a304a9a855a1c0e9a576c94d4cca239b427 @ git://anongit.freedesktop.org/gfx-ci/linux IGTPW_1512: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1512/ IGT_4530: 0e98bf69f146eb72fe3a7c3b19a049b5786f0ca3 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools == Logs == For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_1512/shards.html _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [igt-dev] [Intel-gfx] [PATCH i-g-t] igt/gem_exec_gttfill: Avoid pwrite into busy handle 2018-06-28 21:35 ` Chris Wilson @ 2018-06-29 15:15 ` Tvrtko Ursulin -1 siblings, 0 replies; 18+ messages in thread From: Tvrtko Ursulin @ 2018-06-29 15:15 UTC (permalink / raw) To: Chris Wilson, intel-gfx; +Cc: igt-dev On 28/06/2018 22:35, Chris Wilson wrote: > The goal of gem_exec_gttfill is to exercise execbuf under heavy GTT > pressure (by trying to execute more objects than may fit into the GTT). > We spread the same set of handles across different processes, with the > result that each would occasionally stall waiting for execution of an > unrelated batch, limiting the pressure we were applying. If we using a > steaming write via a WC pointer, we can avoid the serialisation penalty > and so submit faster. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > tests/gem_exec_gttfill.c | 66 +++++++++++++++++++++++++--------------- > 1 file changed, 42 insertions(+), 24 deletions(-) > > diff --git a/tests/gem_exec_gttfill.c b/tests/gem_exec_gttfill.c > index 4097e4077..efd612bb6 100644 > --- a/tests/gem_exec_gttfill.c > +++ b/tests/gem_exec_gttfill.c > @@ -28,18 +28,25 @@ IGT_TEST_DESCRIPTION("Fill the GTT with batches."); > > #define BATCH_SIZE (4096<<10) > > -static void xchg_u32(void *array, unsigned i, unsigned j) > +struct batch { > + uint32_t handle; > + void *ptr; > +}; > + > +static void xchg_batch(void *array, unsigned int i, unsigned int j) > { > - uint32_t *u32 = array; > - uint32_t tmp = u32[i]; > - u32[i] = u32[j]; > - u32[j] = tmp; > + struct batch *batches = array; > + struct batch tmp; > + > + tmp = batches[i]; > + batches[i] = batches[j]; > + batches[j] = tmp; > } > > static void submit(int fd, int gen, > struct drm_i915_gem_execbuffer2 *eb, > struct drm_i915_gem_relocation_entry *reloc, > - uint32_t *handles, unsigned count) > + struct batch *batches, unsigned int count) > { > struct drm_i915_gem_exec_object2 obj; > uint32_t batch[16]; > @@ -80,7 +87,7 @@ static void submit(int fd, int gen, > > eb->buffers_ptr = to_user_pointer(&obj); > for (unsigned i = 0; i < count; i++) { > - obj.handle = handles[i]; > + obj.handle = batches[i].handle; > reloc[0].target_handle = obj.handle; > reloc[1].target_handle = obj.handle; > > @@ -88,8 +95,8 @@ static void submit(int fd, int gen, > reloc[0].presumed_offset = obj.offset; > reloc[1].presumed_offset = obj.offset; > > - gem_write(fd, obj.handle, eb->batch_start_offset, > - batch, sizeof(batch)); > + memcpy(batches[i].ptr + eb->batch_start_offset, > + batch, sizeof(batch)); > > gem_execbuf(fd, eb); > } > @@ -103,7 +110,7 @@ static void fillgtt(int fd, unsigned ring, int timeout) > struct drm_i915_gem_execbuffer2 execbuf; > struct drm_i915_gem_relocation_entry reloc[2]; > volatile uint64_t *shared; > - unsigned *handles; > + struct batch *batches; > unsigned engines[16]; > unsigned nengine; > unsigned engine; > @@ -145,29 +152,38 @@ static void fillgtt(int fd, unsigned ring, int timeout) > if (gen < 6) > execbuf.flags |= I915_EXEC_SECURE; > > - handles = calloc(count, sizeof(handles)); > - igt_assert(handles); > - for (unsigned i = 0; i < count; i++) > - handles[i] = gem_create(fd, BATCH_SIZE); > + batches = calloc(count, sizeof(*batches)); > + igt_assert(batches); > + for (unsigned i = 0; i < count; i++) { > + batches[i].handle = gem_create(fd, BATCH_SIZE); > + batches[i].ptr = > + __gem_mmap__wc(fd, batches[i].handle, > + 0, BATCH_SIZE, PROT_WRITE); > + if (!batches[i].ptr) { > + batches[i].ptr = > + __gem_mmap__gtt(fd, batches[i].handle, > + BATCH_SIZE, PROT_WRITE); > + } > + igt_require(batches[i].ptr); Not assert? > + } > > /* Flush all memory before we start the timer */ > - submit(fd, gen, &execbuf, reloc, handles, count); > + submit(fd, gen, &execbuf, reloc, batches, count); > > igt_fork(child, nengine) { > uint64_t cycles = 0; > hars_petruska_f54_1_random_perturb(child); > - igt_permute_array(handles, count, xchg_u32); > + igt_permute_array(batches, count, xchg_batch); > execbuf.batch_start_offset = child*64; > execbuf.flags |= engines[child]; > igt_until_timeout(timeout) { > - submit(fd, gen, &execbuf, reloc, handles, count); > + submit(fd, gen, &execbuf, reloc, batches, count); > for (unsigned i = 0; i < count; i++) { > - uint32_t handle = handles[i]; > - uint64_t buf[2]; > + uint64_t offset, delta; > > - gem_read(fd, handle, reloc[1].offset, &buf[0], sizeof(buf[0])); > - gem_read(fd, handle, reloc[0].delta, &buf[1], sizeof(buf[1])); > - igt_assert_eq_u64(buf[0], buf[1]); No flushing or domain management needed, especially since it can be either wc or gtt mmap? > + offset = *(uint64_t *)(batches[i].ptr + reloc[1].offset); > + delta = *(uint64_t *)(batches[i].ptr + reloc[0].delta); > + igt_assert_eq_u64(offset, delta); > } > cycles++; > } > @@ -176,8 +192,10 @@ static void fillgtt(int fd, unsigned ring, int timeout) > } > igt_waitchildren(); > > - for (unsigned i = 0; i < count; i++) > - gem_close(fd, handles[i]); > + for (unsigned i = 0; i < count; i++) { > + munmap(batches[i].ptr, BATCH_SIZE); > + gem_close(fd, batches[i].handle); > + } > > shared[nengine] = 0; > for (unsigned i = 0; i < nengine; i++) > Regards, Tvrtko _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH i-g-t] igt/gem_exec_gttfill: Avoid pwrite into busy handle @ 2018-06-29 15:15 ` Tvrtko Ursulin 0 siblings, 0 replies; 18+ messages in thread From: Tvrtko Ursulin @ 2018-06-29 15:15 UTC (permalink / raw) To: Chris Wilson, intel-gfx; +Cc: igt-dev On 28/06/2018 22:35, Chris Wilson wrote: > The goal of gem_exec_gttfill is to exercise execbuf under heavy GTT > pressure (by trying to execute more objects than may fit into the GTT). > We spread the same set of handles across different processes, with the > result that each would occasionally stall waiting for execution of an > unrelated batch, limiting the pressure we were applying. If we using a > steaming write via a WC pointer, we can avoid the serialisation penalty > and so submit faster. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > tests/gem_exec_gttfill.c | 66 +++++++++++++++++++++++++--------------- > 1 file changed, 42 insertions(+), 24 deletions(-) > > diff --git a/tests/gem_exec_gttfill.c b/tests/gem_exec_gttfill.c > index 4097e4077..efd612bb6 100644 > --- a/tests/gem_exec_gttfill.c > +++ b/tests/gem_exec_gttfill.c > @@ -28,18 +28,25 @@ IGT_TEST_DESCRIPTION("Fill the GTT with batches."); > > #define BATCH_SIZE (4096<<10) > > -static void xchg_u32(void *array, unsigned i, unsigned j) > +struct batch { > + uint32_t handle; > + void *ptr; > +}; > + > +static void xchg_batch(void *array, unsigned int i, unsigned int j) > { > - uint32_t *u32 = array; > - uint32_t tmp = u32[i]; > - u32[i] = u32[j]; > - u32[j] = tmp; > + struct batch *batches = array; > + struct batch tmp; > + > + tmp = batches[i]; > + batches[i] = batches[j]; > + batches[j] = tmp; > } > > static void submit(int fd, int gen, > struct drm_i915_gem_execbuffer2 *eb, > struct drm_i915_gem_relocation_entry *reloc, > - uint32_t *handles, unsigned count) > + struct batch *batches, unsigned int count) > { > struct drm_i915_gem_exec_object2 obj; > uint32_t batch[16]; > @@ -80,7 +87,7 @@ static void submit(int fd, int gen, > > eb->buffers_ptr = to_user_pointer(&obj); > for (unsigned i = 0; i < count; i++) { > - obj.handle = handles[i]; > + obj.handle = batches[i].handle; > reloc[0].target_handle = obj.handle; > reloc[1].target_handle = obj.handle; > > @@ -88,8 +95,8 @@ static void submit(int fd, int gen, > reloc[0].presumed_offset = obj.offset; > reloc[1].presumed_offset = obj.offset; > > - gem_write(fd, obj.handle, eb->batch_start_offset, > - batch, sizeof(batch)); > + memcpy(batches[i].ptr + eb->batch_start_offset, > + batch, sizeof(batch)); > > gem_execbuf(fd, eb); > } > @@ -103,7 +110,7 @@ static void fillgtt(int fd, unsigned ring, int timeout) > struct drm_i915_gem_execbuffer2 execbuf; > struct drm_i915_gem_relocation_entry reloc[2]; > volatile uint64_t *shared; > - unsigned *handles; > + struct batch *batches; > unsigned engines[16]; > unsigned nengine; > unsigned engine; > @@ -145,29 +152,38 @@ static void fillgtt(int fd, unsigned ring, int timeout) > if (gen < 6) > execbuf.flags |= I915_EXEC_SECURE; > > - handles = calloc(count, sizeof(handles)); > - igt_assert(handles); > - for (unsigned i = 0; i < count; i++) > - handles[i] = gem_create(fd, BATCH_SIZE); > + batches = calloc(count, sizeof(*batches)); > + igt_assert(batches); > + for (unsigned i = 0; i < count; i++) { > + batches[i].handle = gem_create(fd, BATCH_SIZE); > + batches[i].ptr = > + __gem_mmap__wc(fd, batches[i].handle, > + 0, BATCH_SIZE, PROT_WRITE); > + if (!batches[i].ptr) { > + batches[i].ptr = > + __gem_mmap__gtt(fd, batches[i].handle, > + BATCH_SIZE, PROT_WRITE); > + } > + igt_require(batches[i].ptr); Not assert? > + } > > /* Flush all memory before we start the timer */ > - submit(fd, gen, &execbuf, reloc, handles, count); > + submit(fd, gen, &execbuf, reloc, batches, count); > > igt_fork(child, nengine) { > uint64_t cycles = 0; > hars_petruska_f54_1_random_perturb(child); > - igt_permute_array(handles, count, xchg_u32); > + igt_permute_array(batches, count, xchg_batch); > execbuf.batch_start_offset = child*64; > execbuf.flags |= engines[child]; > igt_until_timeout(timeout) { > - submit(fd, gen, &execbuf, reloc, handles, count); > + submit(fd, gen, &execbuf, reloc, batches, count); > for (unsigned i = 0; i < count; i++) { > - uint32_t handle = handles[i]; > - uint64_t buf[2]; > + uint64_t offset, delta; > > - gem_read(fd, handle, reloc[1].offset, &buf[0], sizeof(buf[0])); > - gem_read(fd, handle, reloc[0].delta, &buf[1], sizeof(buf[1])); > - igt_assert_eq_u64(buf[0], buf[1]); No flushing or domain management needed, especially since it can be either wc or gtt mmap? > + offset = *(uint64_t *)(batches[i].ptr + reloc[1].offset); > + delta = *(uint64_t *)(batches[i].ptr + reloc[0].delta); > + igt_assert_eq_u64(offset, delta); > } > cycles++; > } > @@ -176,8 +192,10 @@ static void fillgtt(int fd, unsigned ring, int timeout) > } > igt_waitchildren(); > > - for (unsigned i = 0; i < count; i++) > - gem_close(fd, handles[i]); > + for (unsigned i = 0; i < count; i++) { > + munmap(batches[i].ptr, BATCH_SIZE); > + gem_close(fd, batches[i].handle); > + } > > shared[nengine] = 0; > for (unsigned i = 0; i < nengine; i++) > Regards, Tvrtko _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [igt-dev] [Intel-gfx] [PATCH i-g-t] igt/gem_exec_gttfill: Avoid pwrite into busy handle 2018-06-29 15:15 ` Tvrtko Ursulin @ 2018-06-29 15:22 ` Chris Wilson -1 siblings, 0 replies; 18+ messages in thread From: Chris Wilson @ 2018-06-29 15:22 UTC (permalink / raw) To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev Quoting Tvrtko Ursulin (2018-06-29 16:15:04) > > On 28/06/2018 22:35, Chris Wilson wrote: > > The goal of gem_exec_gttfill is to exercise execbuf under heavy GTT > > pressure (by trying to execute more objects than may fit into the GTT). > > We spread the same set of handles across different processes, with the > > result that each would occasionally stall waiting for execution of an > > unrelated batch, limiting the pressure we were applying. If we using a > > steaming write via a WC pointer, we can avoid the serialisation penalty > > and so submit faster. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > --- > > tests/gem_exec_gttfill.c | 66 +++++++++++++++++++++++++--------------- > > 1 file changed, 42 insertions(+), 24 deletions(-) > > > > diff --git a/tests/gem_exec_gttfill.c b/tests/gem_exec_gttfill.c > > index 4097e4077..efd612bb6 100644 > > --- a/tests/gem_exec_gttfill.c > > +++ b/tests/gem_exec_gttfill.c > > @@ -28,18 +28,25 @@ IGT_TEST_DESCRIPTION("Fill the GTT with batches."); > > > > #define BATCH_SIZE (4096<<10) > > > > -static void xchg_u32(void *array, unsigned i, unsigned j) > > +struct batch { > > + uint32_t handle; > > + void *ptr; > > +}; > > + > > +static void xchg_batch(void *array, unsigned int i, unsigned int j) > > { > > - uint32_t *u32 = array; > > - uint32_t tmp = u32[i]; > > - u32[i] = u32[j]; > > - u32[j] = tmp; > > + struct batch *batches = array; > > + struct batch tmp; > > + > > + tmp = batches[i]; > > + batches[i] = batches[j]; > > + batches[j] = tmp; > > } > > > > static void submit(int fd, int gen, > > struct drm_i915_gem_execbuffer2 *eb, > > struct drm_i915_gem_relocation_entry *reloc, > > - uint32_t *handles, unsigned count) > > + struct batch *batches, unsigned int count) > > { > > struct drm_i915_gem_exec_object2 obj; > > uint32_t batch[16]; > > @@ -80,7 +87,7 @@ static void submit(int fd, int gen, > > > > eb->buffers_ptr = to_user_pointer(&obj); > > for (unsigned i = 0; i < count; i++) { > > - obj.handle = handles[i]; > > + obj.handle = batches[i].handle; > > reloc[0].target_handle = obj.handle; > > reloc[1].target_handle = obj.handle; > > > > @@ -88,8 +95,8 @@ static void submit(int fd, int gen, > > reloc[0].presumed_offset = obj.offset; > > reloc[1].presumed_offset = obj.offset; > > > > - gem_write(fd, obj.handle, eb->batch_start_offset, > > - batch, sizeof(batch)); > > + memcpy(batches[i].ptr + eb->batch_start_offset, > > + batch, sizeof(batch)); > > > > gem_execbuf(fd, eb); > > } > > @@ -103,7 +110,7 @@ static void fillgtt(int fd, unsigned ring, int timeout) > > struct drm_i915_gem_execbuffer2 execbuf; > > struct drm_i915_gem_relocation_entry reloc[2]; > > volatile uint64_t *shared; > > - unsigned *handles; > > + struct batch *batches; > > unsigned engines[16]; > > unsigned nengine; > > unsigned engine; > > @@ -145,29 +152,38 @@ static void fillgtt(int fd, unsigned ring, int timeout) > > if (gen < 6) > > execbuf.flags |= I915_EXEC_SECURE; > > > > - handles = calloc(count, sizeof(handles)); > > - igt_assert(handles); > > - for (unsigned i = 0; i < count; i++) > > - handles[i] = gem_create(fd, BATCH_SIZE); > > + batches = calloc(count, sizeof(*batches)); > > + igt_assert(batches); > > + for (unsigned i = 0; i < count; i++) { > > + batches[i].handle = gem_create(fd, BATCH_SIZE); > > + batches[i].ptr = > > + __gem_mmap__wc(fd, batches[i].handle, > > + 0, BATCH_SIZE, PROT_WRITE); > > + if (!batches[i].ptr) { > > + batches[i].ptr = > > + __gem_mmap__gtt(fd, batches[i].handle, > > + BATCH_SIZE, PROT_WRITE); > > + } > > + igt_require(batches[i].ptr); > > Not assert? If we fallback to using gtt, we are likely to run out of mappable space, in which case we can't run the test. We should only fallback to gtt because we can't support WC (the likelihood of it being ENOMEM is small). So skip since a failure is expected on old kernels. > > + } > > > > /* Flush all memory before we start the timer */ > > - submit(fd, gen, &execbuf, reloc, handles, count); > > + submit(fd, gen, &execbuf, reloc, batches, count); > > > > igt_fork(child, nengine) { > > uint64_t cycles = 0; > > hars_petruska_f54_1_random_perturb(child); > > - igt_permute_array(handles, count, xchg_u32); > > + igt_permute_array(batches, count, xchg_batch); > > execbuf.batch_start_offset = child*64; > > execbuf.flags |= engines[child]; > > igt_until_timeout(timeout) { > > - submit(fd, gen, &execbuf, reloc, handles, count); > > + submit(fd, gen, &execbuf, reloc, batches, count); > > for (unsigned i = 0; i < count; i++) { > > - uint32_t handle = handles[i]; > > - uint64_t buf[2]; > > + uint64_t offset, delta; > > > > - gem_read(fd, handle, reloc[1].offset, &buf[0], sizeof(buf[0])); > > - gem_read(fd, handle, reloc[0].delta, &buf[1], sizeof(buf[1])); > > - igt_assert_eq_u64(buf[0], buf[1]); > > No flushing or domain management needed, especially since it can be > either wc or gtt mmap? It's a UC read of a buffer known to already flushed from the CPU caches with a prior gem_sync, so no not required. Considering that asynchronous access is the whole point of the patch... -Chris _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH i-g-t] igt/gem_exec_gttfill: Avoid pwrite into busy handle @ 2018-06-29 15:22 ` Chris Wilson 0 siblings, 0 replies; 18+ messages in thread From: Chris Wilson @ 2018-06-29 15:22 UTC (permalink / raw) To: Tvrtko Ursulin, intel-gfx; +Cc: igt-dev Quoting Tvrtko Ursulin (2018-06-29 16:15:04) > > On 28/06/2018 22:35, Chris Wilson wrote: > > The goal of gem_exec_gttfill is to exercise execbuf under heavy GTT > > pressure (by trying to execute more objects than may fit into the GTT). > > We spread the same set of handles across different processes, with the > > result that each would occasionally stall waiting for execution of an > > unrelated batch, limiting the pressure we were applying. If we using a > > steaming write via a WC pointer, we can avoid the serialisation penalty > > and so submit faster. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > --- > > tests/gem_exec_gttfill.c | 66 +++++++++++++++++++++++++--------------- > > 1 file changed, 42 insertions(+), 24 deletions(-) > > > > diff --git a/tests/gem_exec_gttfill.c b/tests/gem_exec_gttfill.c > > index 4097e4077..efd612bb6 100644 > > --- a/tests/gem_exec_gttfill.c > > +++ b/tests/gem_exec_gttfill.c > > @@ -28,18 +28,25 @@ IGT_TEST_DESCRIPTION("Fill the GTT with batches."); > > > > #define BATCH_SIZE (4096<<10) > > > > -static void xchg_u32(void *array, unsigned i, unsigned j) > > +struct batch { > > + uint32_t handle; > > + void *ptr; > > +}; > > + > > +static void xchg_batch(void *array, unsigned int i, unsigned int j) > > { > > - uint32_t *u32 = array; > > - uint32_t tmp = u32[i]; > > - u32[i] = u32[j]; > > - u32[j] = tmp; > > + struct batch *batches = array; > > + struct batch tmp; > > + > > + tmp = batches[i]; > > + batches[i] = batches[j]; > > + batches[j] = tmp; > > } > > > > static void submit(int fd, int gen, > > struct drm_i915_gem_execbuffer2 *eb, > > struct drm_i915_gem_relocation_entry *reloc, > > - uint32_t *handles, unsigned count) > > + struct batch *batches, unsigned int count) > > { > > struct drm_i915_gem_exec_object2 obj; > > uint32_t batch[16]; > > @@ -80,7 +87,7 @@ static void submit(int fd, int gen, > > > > eb->buffers_ptr = to_user_pointer(&obj); > > for (unsigned i = 0; i < count; i++) { > > - obj.handle = handles[i]; > > + obj.handle = batches[i].handle; > > reloc[0].target_handle = obj.handle; > > reloc[1].target_handle = obj.handle; > > > > @@ -88,8 +95,8 @@ static void submit(int fd, int gen, > > reloc[0].presumed_offset = obj.offset; > > reloc[1].presumed_offset = obj.offset; > > > > - gem_write(fd, obj.handle, eb->batch_start_offset, > > - batch, sizeof(batch)); > > + memcpy(batches[i].ptr + eb->batch_start_offset, > > + batch, sizeof(batch)); > > > > gem_execbuf(fd, eb); > > } > > @@ -103,7 +110,7 @@ static void fillgtt(int fd, unsigned ring, int timeout) > > struct drm_i915_gem_execbuffer2 execbuf; > > struct drm_i915_gem_relocation_entry reloc[2]; > > volatile uint64_t *shared; > > - unsigned *handles; > > + struct batch *batches; > > unsigned engines[16]; > > unsigned nengine; > > unsigned engine; > > @@ -145,29 +152,38 @@ static void fillgtt(int fd, unsigned ring, int timeout) > > if (gen < 6) > > execbuf.flags |= I915_EXEC_SECURE; > > > > - handles = calloc(count, sizeof(handles)); > > - igt_assert(handles); > > - for (unsigned i = 0; i < count; i++) > > - handles[i] = gem_create(fd, BATCH_SIZE); > > + batches = calloc(count, sizeof(*batches)); > > + igt_assert(batches); > > + for (unsigned i = 0; i < count; i++) { > > + batches[i].handle = gem_create(fd, BATCH_SIZE); > > + batches[i].ptr = > > + __gem_mmap__wc(fd, batches[i].handle, > > + 0, BATCH_SIZE, PROT_WRITE); > > + if (!batches[i].ptr) { > > + batches[i].ptr = > > + __gem_mmap__gtt(fd, batches[i].handle, > > + BATCH_SIZE, PROT_WRITE); > > + } > > + igt_require(batches[i].ptr); > > Not assert? If we fallback to using gtt, we are likely to run out of mappable space, in which case we can't run the test. We should only fallback to gtt because we can't support WC (the likelihood of it being ENOMEM is small). So skip since a failure is expected on old kernels. > > + } > > > > /* Flush all memory before we start the timer */ > > - submit(fd, gen, &execbuf, reloc, handles, count); > > + submit(fd, gen, &execbuf, reloc, batches, count); > > > > igt_fork(child, nengine) { > > uint64_t cycles = 0; > > hars_petruska_f54_1_random_perturb(child); > > - igt_permute_array(handles, count, xchg_u32); > > + igt_permute_array(batches, count, xchg_batch); > > execbuf.batch_start_offset = child*64; > > execbuf.flags |= engines[child]; > > igt_until_timeout(timeout) { > > - submit(fd, gen, &execbuf, reloc, handles, count); > > + submit(fd, gen, &execbuf, reloc, batches, count); > > for (unsigned i = 0; i < count; i++) { > > - uint32_t handle = handles[i]; > > - uint64_t buf[2]; > > + uint64_t offset, delta; > > > > - gem_read(fd, handle, reloc[1].offset, &buf[0], sizeof(buf[0])); > > - gem_read(fd, handle, reloc[0].delta, &buf[1], sizeof(buf[1])); > > - igt_assert_eq_u64(buf[0], buf[1]); > > No flushing or domain management needed, especially since it can be > either wc or gtt mmap? It's a UC read of a buffer known to already flushed from the CPU caches with a prior gem_sync, so no not required. Considering that asynchronous access is the whole point of the patch... -Chris _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [igt-dev] [Intel-gfx] [PATCH i-g-t] igt/gem_exec_gttfill: Avoid pwrite into busy handle 2018-06-29 15:22 ` Chris Wilson @ 2018-06-29 15:43 ` Tvrtko Ursulin -1 siblings, 0 replies; 18+ messages in thread From: Tvrtko Ursulin @ 2018-06-29 15:43 UTC (permalink / raw) To: Chris Wilson, intel-gfx; +Cc: igt-dev On 29/06/2018 16:22, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2018-06-29 16:15:04) >> >> On 28/06/2018 22:35, Chris Wilson wrote: >>> The goal of gem_exec_gttfill is to exercise execbuf under heavy GTT >>> pressure (by trying to execute more objects than may fit into the GTT). >>> We spread the same set of handles across different processes, with the >>> result that each would occasionally stall waiting for execution of an >>> unrelated batch, limiting the pressure we were applying. If we using a >>> steaming write via a WC pointer, we can avoid the serialisation penalty >>> and so submit faster. >>> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >>> --- >>> tests/gem_exec_gttfill.c | 66 +++++++++++++++++++++++++--------------- >>> 1 file changed, 42 insertions(+), 24 deletions(-) >>> >>> diff --git a/tests/gem_exec_gttfill.c b/tests/gem_exec_gttfill.c >>> index 4097e4077..efd612bb6 100644 >>> --- a/tests/gem_exec_gttfill.c >>> +++ b/tests/gem_exec_gttfill.c >>> @@ -28,18 +28,25 @@ IGT_TEST_DESCRIPTION("Fill the GTT with batches."); >>> >>> #define BATCH_SIZE (4096<<10) >>> >>> -static void xchg_u32(void *array, unsigned i, unsigned j) >>> +struct batch { >>> + uint32_t handle; >>> + void *ptr; >>> +}; >>> + >>> +static void xchg_batch(void *array, unsigned int i, unsigned int j) >>> { >>> - uint32_t *u32 = array; >>> - uint32_t tmp = u32[i]; >>> - u32[i] = u32[j]; >>> - u32[j] = tmp; >>> + struct batch *batches = array; >>> + struct batch tmp; >>> + >>> + tmp = batches[i]; >>> + batches[i] = batches[j]; >>> + batches[j] = tmp; >>> } >>> >>> static void submit(int fd, int gen, >>> struct drm_i915_gem_execbuffer2 *eb, >>> struct drm_i915_gem_relocation_entry *reloc, >>> - uint32_t *handles, unsigned count) >>> + struct batch *batches, unsigned int count) >>> { >>> struct drm_i915_gem_exec_object2 obj; >>> uint32_t batch[16]; >>> @@ -80,7 +87,7 @@ static void submit(int fd, int gen, >>> >>> eb->buffers_ptr = to_user_pointer(&obj); >>> for (unsigned i = 0; i < count; i++) { >>> - obj.handle = handles[i]; >>> + obj.handle = batches[i].handle; >>> reloc[0].target_handle = obj.handle; >>> reloc[1].target_handle = obj.handle; >>> >>> @@ -88,8 +95,8 @@ static void submit(int fd, int gen, >>> reloc[0].presumed_offset = obj.offset; >>> reloc[1].presumed_offset = obj.offset; >>> >>> - gem_write(fd, obj.handle, eb->batch_start_offset, >>> - batch, sizeof(batch)); >>> + memcpy(batches[i].ptr + eb->batch_start_offset, >>> + batch, sizeof(batch)); >>> >>> gem_execbuf(fd, eb); >>> } >>> @@ -103,7 +110,7 @@ static void fillgtt(int fd, unsigned ring, int timeout) >>> struct drm_i915_gem_execbuffer2 execbuf; >>> struct drm_i915_gem_relocation_entry reloc[2]; >>> volatile uint64_t *shared; >>> - unsigned *handles; >>> + struct batch *batches; >>> unsigned engines[16]; >>> unsigned nengine; >>> unsigned engine; >>> @@ -145,29 +152,38 @@ static void fillgtt(int fd, unsigned ring, int timeout) >>> if (gen < 6) >>> execbuf.flags |= I915_EXEC_SECURE; >>> >>> - handles = calloc(count, sizeof(handles)); >>> - igt_assert(handles); >>> - for (unsigned i = 0; i < count; i++) >>> - handles[i] = gem_create(fd, BATCH_SIZE); >>> + batches = calloc(count, sizeof(*batches)); >>> + igt_assert(batches); >>> + for (unsigned i = 0; i < count; i++) { >>> + batches[i].handle = gem_create(fd, BATCH_SIZE); >>> + batches[i].ptr = >>> + __gem_mmap__wc(fd, batches[i].handle, >>> + 0, BATCH_SIZE, PROT_WRITE); >>> + if (!batches[i].ptr) { >>> + batches[i].ptr = >>> + __gem_mmap__gtt(fd, batches[i].handle, >>> + BATCH_SIZE, PROT_WRITE); >>> + } >>> + igt_require(batches[i].ptr); >> >> Not assert? > > If we fallback to using gtt, we are likely to run out of mappable space, > in which case we can't run the test. We should only fallback to gtt > because we can't support WC (the likelihood of it being ENOMEM is > small). So skip since a failure is expected on old kernels. > >>> + } >>> >>> /* Flush all memory before we start the timer */ >>> - submit(fd, gen, &execbuf, reloc, handles, count); >>> + submit(fd, gen, &execbuf, reloc, batches, count); >>> >>> igt_fork(child, nengine) { >>> uint64_t cycles = 0; >>> hars_petruska_f54_1_random_perturb(child); >>> - igt_permute_array(handles, count, xchg_u32); >>> + igt_permute_array(batches, count, xchg_batch); >>> execbuf.batch_start_offset = child*64; >>> execbuf.flags |= engines[child]; >>> igt_until_timeout(timeout) { >>> - submit(fd, gen, &execbuf, reloc, handles, count); >>> + submit(fd, gen, &execbuf, reloc, batches, count); >>> for (unsigned i = 0; i < count; i++) { >>> - uint32_t handle = handles[i]; >>> - uint64_t buf[2]; >>> + uint64_t offset, delta; >>> >>> - gem_read(fd, handle, reloc[1].offset, &buf[0], sizeof(buf[0])); >>> - gem_read(fd, handle, reloc[0].delta, &buf[1], sizeof(buf[1])); >>> - igt_assert_eq_u64(buf[0], buf[1]); >> >> No flushing or domain management needed, especially since it can be >> either wc or gtt mmap? > > It's a UC read of a buffer known to already flushed from the CPU caches > with a prior gem_sync, so no not required. Considering that asynchronous > access is the whole point of the patch... True. Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko _______________________________________________ igt-dev mailing list igt-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/igt-dev ^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH i-g-t] igt/gem_exec_gttfill: Avoid pwrite into busy handle @ 2018-06-29 15:43 ` Tvrtko Ursulin 0 siblings, 0 replies; 18+ messages in thread From: Tvrtko Ursulin @ 2018-06-29 15:43 UTC (permalink / raw) To: Chris Wilson, intel-gfx; +Cc: igt-dev On 29/06/2018 16:22, Chris Wilson wrote: > Quoting Tvrtko Ursulin (2018-06-29 16:15:04) >> >> On 28/06/2018 22:35, Chris Wilson wrote: >>> The goal of gem_exec_gttfill is to exercise execbuf under heavy GTT >>> pressure (by trying to execute more objects than may fit into the GTT). >>> We spread the same set of handles across different processes, with the >>> result that each would occasionally stall waiting for execution of an >>> unrelated batch, limiting the pressure we were applying. If we using a >>> steaming write via a WC pointer, we can avoid the serialisation penalty >>> and so submit faster. >>> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >>> --- >>> tests/gem_exec_gttfill.c | 66 +++++++++++++++++++++++++--------------- >>> 1 file changed, 42 insertions(+), 24 deletions(-) >>> >>> diff --git a/tests/gem_exec_gttfill.c b/tests/gem_exec_gttfill.c >>> index 4097e4077..efd612bb6 100644 >>> --- a/tests/gem_exec_gttfill.c >>> +++ b/tests/gem_exec_gttfill.c >>> @@ -28,18 +28,25 @@ IGT_TEST_DESCRIPTION("Fill the GTT with batches."); >>> >>> #define BATCH_SIZE (4096<<10) >>> >>> -static void xchg_u32(void *array, unsigned i, unsigned j) >>> +struct batch { >>> + uint32_t handle; >>> + void *ptr; >>> +}; >>> + >>> +static void xchg_batch(void *array, unsigned int i, unsigned int j) >>> { >>> - uint32_t *u32 = array; >>> - uint32_t tmp = u32[i]; >>> - u32[i] = u32[j]; >>> - u32[j] = tmp; >>> + struct batch *batches = array; >>> + struct batch tmp; >>> + >>> + tmp = batches[i]; >>> + batches[i] = batches[j]; >>> + batches[j] = tmp; >>> } >>> >>> static void submit(int fd, int gen, >>> struct drm_i915_gem_execbuffer2 *eb, >>> struct drm_i915_gem_relocation_entry *reloc, >>> - uint32_t *handles, unsigned count) >>> + struct batch *batches, unsigned int count) >>> { >>> struct drm_i915_gem_exec_object2 obj; >>> uint32_t batch[16]; >>> @@ -80,7 +87,7 @@ static void submit(int fd, int gen, >>> >>> eb->buffers_ptr = to_user_pointer(&obj); >>> for (unsigned i = 0; i < count; i++) { >>> - obj.handle = handles[i]; >>> + obj.handle = batches[i].handle; >>> reloc[0].target_handle = obj.handle; >>> reloc[1].target_handle = obj.handle; >>> >>> @@ -88,8 +95,8 @@ static void submit(int fd, int gen, >>> reloc[0].presumed_offset = obj.offset; >>> reloc[1].presumed_offset = obj.offset; >>> >>> - gem_write(fd, obj.handle, eb->batch_start_offset, >>> - batch, sizeof(batch)); >>> + memcpy(batches[i].ptr + eb->batch_start_offset, >>> + batch, sizeof(batch)); >>> >>> gem_execbuf(fd, eb); >>> } >>> @@ -103,7 +110,7 @@ static void fillgtt(int fd, unsigned ring, int timeout) >>> struct drm_i915_gem_execbuffer2 execbuf; >>> struct drm_i915_gem_relocation_entry reloc[2]; >>> volatile uint64_t *shared; >>> - unsigned *handles; >>> + struct batch *batches; >>> unsigned engines[16]; >>> unsigned nengine; >>> unsigned engine; >>> @@ -145,29 +152,38 @@ static void fillgtt(int fd, unsigned ring, int timeout) >>> if (gen < 6) >>> execbuf.flags |= I915_EXEC_SECURE; >>> >>> - handles = calloc(count, sizeof(handles)); >>> - igt_assert(handles); >>> - for (unsigned i = 0; i < count; i++) >>> - handles[i] = gem_create(fd, BATCH_SIZE); >>> + batches = calloc(count, sizeof(*batches)); >>> + igt_assert(batches); >>> + for (unsigned i = 0; i < count; i++) { >>> + batches[i].handle = gem_create(fd, BATCH_SIZE); >>> + batches[i].ptr = >>> + __gem_mmap__wc(fd, batches[i].handle, >>> + 0, BATCH_SIZE, PROT_WRITE); >>> + if (!batches[i].ptr) { >>> + batches[i].ptr = >>> + __gem_mmap__gtt(fd, batches[i].handle, >>> + BATCH_SIZE, PROT_WRITE); >>> + } >>> + igt_require(batches[i].ptr); >> >> Not assert? > > If we fallback to using gtt, we are likely to run out of mappable space, > in which case we can't run the test. We should only fallback to gtt > because we can't support WC (the likelihood of it being ENOMEM is > small). So skip since a failure is expected on old kernels. > >>> + } >>> >>> /* Flush all memory before we start the timer */ >>> - submit(fd, gen, &execbuf, reloc, handles, count); >>> + submit(fd, gen, &execbuf, reloc, batches, count); >>> >>> igt_fork(child, nengine) { >>> uint64_t cycles = 0; >>> hars_petruska_f54_1_random_perturb(child); >>> - igt_permute_array(handles, count, xchg_u32); >>> + igt_permute_array(batches, count, xchg_batch); >>> execbuf.batch_start_offset = child*64; >>> execbuf.flags |= engines[child]; >>> igt_until_timeout(timeout) { >>> - submit(fd, gen, &execbuf, reloc, handles, count); >>> + submit(fd, gen, &execbuf, reloc, batches, count); >>> for (unsigned i = 0; i < count; i++) { >>> - uint32_t handle = handles[i]; >>> - uint64_t buf[2]; >>> + uint64_t offset, delta; >>> >>> - gem_read(fd, handle, reloc[1].offset, &buf[0], sizeof(buf[0])); >>> - gem_read(fd, handle, reloc[0].delta, &buf[1], sizeof(buf[1])); >>> - igt_assert_eq_u64(buf[0], buf[1]); >> >> No flushing or domain management needed, especially since it can be >> either wc or gtt mmap? > > It's a UC read of a buffer known to already flushed from the CPU caches > with a prior gem_sync, so no not required. Considering that asynchronous > access is the whole point of the patch... True. Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx ^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2018-06-29 15:43 UTC | newest] Thread overview: 18+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2018-06-28 21:35 [igt-dev] [PATCH i-g-t] igt/gem_exec_gttfill: Avoid pwrite into busy handle Chris Wilson 2018-06-28 21:35 ` Chris Wilson 2018-06-29 0:21 ` [igt-dev] ✓ Fi.CI.BAT: success for " Patchwork 2018-06-29 7:17 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork 2018-06-29 7:44 ` [igt-dev] [PATCH i-g-t v2] igt/gem_userptr: Check read-only mappings Chris Wilson 2018-06-29 7:44 ` Chris Wilson 2018-06-29 9:31 ` [igt-dev] [Intel-gfx] " Tvrtko Ursulin 2018-06-29 9:31 ` Tvrtko Ursulin 2018-06-29 9:44 ` [igt-dev] [Intel-gfx] " Chris Wilson 2018-06-29 9:44 ` Chris Wilson 2018-06-29 8:12 ` [igt-dev] ✓ Fi.CI.BAT: success for igt/gem_exec_gttfill: Avoid pwrite into busy handle (rev2) Patchwork 2018-06-29 9:37 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork 2018-06-29 15:15 ` [igt-dev] [Intel-gfx] [PATCH i-g-t] igt/gem_exec_gttfill: Avoid pwrite into busy handle Tvrtko Ursulin 2018-06-29 15:15 ` Tvrtko Ursulin 2018-06-29 15:22 ` [igt-dev] [Intel-gfx] " Chris Wilson 2018-06-29 15:22 ` Chris Wilson 2018-06-29 15:43 ` [igt-dev] [Intel-gfx] " Tvrtko Ursulin 2018-06-29 15:43 ` Tvrtko Ursulin
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.