From mboxrd@z Thu Jan 1 00:00:00 1970 From: Tvrtko Ursulin Subject: Re: [PATCH 3/3] tests/gem_userptr_benchmark: Benchmarking userptr surfaces and impact Date: Wed, 23 Apr 2014 14:28:54 +0100 Message-ID: <5357C016.8090600@linux.intel.com> References: <1393431465-31630-1-git-send-email-tvrtko.ursulin@linux.intel.com> <1395227586-18623-1-git-send-email-tvrtko.ursulin@linux.intel.com> <1395227586-18623-4-git-send-email-tvrtko.ursulin@linux.intel.com> <20140417231846.GB7521@bdvolkin-ubuntu-desktop> Mime-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1"; Format="flowed" Content-Transfer-Encoding: quoted-printable Return-path: Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by gabe.freedesktop.org (Postfix) with ESMTP id 16AAA6E67E for ; Wed, 23 Apr 2014 06:28:57 -0700 (PDT) In-Reply-To: <20140417231846.GB7521@bdvolkin-ubuntu-desktop> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" To: "Volkin, Bradley D" Cc: "Intel-gfx@lists.freedesktop.org" List-Id: intel-gfx@lists.freedesktop.org Hi Brad, On 04/18/2014 12:18 AM, Volkin, Bradley D wrote: > On Wed, Mar 19, 2014 at 04:13:06AM -0700, Tvrtko Ursulin wrote: >> From: Tvrtko Ursulin >> >> This adds a small benchmark for the new userptr functionality. >> >> Apart from basic surface creation and destruction, also tested is the >> impact of having userptr surfaces in the process address space. Reason >> for that is the impact of MMU notifiers on common address space >> operations like munmap() which is per process. >> >> v2: >> * Moved to benchmarks. >> * Added pointer read/write tests. >> * Changed output to say iterations per second instead of >> operations per second. >> * Multiply result by batch size for multi-create* tests >> for a more comparable number with create-destroy test. >> >> v3: >> * Fixed ioctl detection on kernels without MMU_NOTIFIERs. >> >> Signed-off-by: Tvrtko Ursulin >> Cc: Chris Wilson >> --- >> Android.mk | 3 +- >> benchmarks/.gitignore | 1 + >> benchmarks/Android.mk | 36 +++ >> benchmarks/Makefile.am | 7 +- >> benchmarks/Makefile.sources | 6 + >> benchmarks/gem_userptr_benchmark.c | 513 +++++++++++++++++++++++++++++= ++++++++ >> 6 files changed, 558 insertions(+), 8 deletions(-) >> create mode 100644 benchmarks/Android.mk >> create mode 100644 benchmarks/Makefile.sources >> create mode 100644 benchmarks/gem_userptr_benchmark.c >> >> diff --git a/Android.mk b/Android.mk >> index 8aeb2d4..0c969b8 100644 >> --- a/Android.mk >> +++ b/Android.mk >> @@ -1,2 +1 @@ >> -include $(call all-named-subdir-makefiles, lib tests tools) >> - >> +include $(call all-named-subdir-makefiles, lib tests tools benchmarks) >> diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore >> index ddea6f7..09e5bd8 100644 >> --- a/benchmarks/.gitignore >> +++ b/benchmarks/.gitignore >> @@ -1,3 +1,4 @@ >> +gem_userptr_benchmark >> intel_upload_blit_large >> intel_upload_blit_large_gtt >> intel_upload_blit_large_map >> diff --git a/benchmarks/Android.mk b/benchmarks/Android.mk >> new file mode 100644 >> index 0000000..5bb8ef5 >> --- /dev/null >> +++ b/benchmarks/Android.mk >> @@ -0,0 +1,36 @@ >> +LOCAL_PATH :=3D $(call my-dir) >> + >> +include $(LOCAL_PATH)/Makefile.sources >> + >> +#=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D# >> + >> +define add_benchmark >> + include $(CLEAR_VARS) >> + >> + LOCAL_SRC_FILES :=3D $1.c >> + >> + LOCAL_CFLAGS +=3D -DHAVE_STRUCT_SYSINFO_TOTALRAM >> + LOCAL_CFLAGS +=3D -DANDROID -UNDEBUG -include "check-ndebug.h" >> + LOCAL_CFLAGS +=3D -std=3Dc99 >> + # FIXME: drop once Bionic correctly annotates "noreturn" on pthread= _exit >> + LOCAL_CFLAGS +=3D -Wno-error=3Dreturn-type >> + # Excessive complaining for established cases. Rely on the Linux ve= rsion warnings. >> + LOCAL_CFLAGS +=3D -Wno-sign-compare >> + >> + LOCAL_MODULE :=3D $1 >> + LOCAL_MODULE_TAGS :=3D optional >> + >> + LOCAL_STATIC_LIBRARIES :=3D libintel_gpu_tools >> + >> + LOCAL_SHARED_LIBRARIES :=3D libpciaccess \ >> + libdrm \ >> + libdrm_intel >> + >> + include $(BUILD_EXECUTABLE) >> +endef >> + >> +#=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D# >> + >> +benchmark_list :=3D $(bin_PROGRAMS) >> + >> +$(foreach item,$(benchmark_list),$(eval $(call add_benchmark,$(item)))) >> diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am >> index e2ad784..d173bf4 100644 >> --- a/benchmarks/Makefile.am >> +++ b/benchmarks/Makefile.am >> @@ -1,9 +1,4 @@ >> - >> -bin_PROGRAMS =3D \ >> - intel_upload_blit_large \ >> - intel_upload_blit_large_gtt \ >> - intel_upload_blit_large_map \ >> - intel_upload_blit_small >> +include Makefile.sources >> >> AM_CPPFLAGS =3D -I$(top_srcdir) -I$(top_srcdir)/lib >> AM_CFLAGS =3D $(DRM_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS) >> diff --git a/benchmarks/Makefile.sources b/benchmarks/Makefile.sources >> new file mode 100644 >> index 0000000..fd6c107 >> --- /dev/null >> +++ b/benchmarks/Makefile.sources >> @@ -0,0 +1,6 @@ >> +bin_PROGRAMS =3D \ >> + intel_upload_blit_large \ >> + intel_upload_blit_large_gtt \ >> + intel_upload_blit_large_map \ >> + intel_upload_blit_small \ >> + gem_userptr_benchmark > > You might split the makefile cleanup aspect of this into a separate > patch, but I'm fine either way. Good idea, will try to squeeze that in. >> diff --git a/benchmarks/gem_userptr_benchmark.c b/benchmarks/gem_userptr= _benchmark.c >> new file mode 100644 >> index 0000000..218f6f1 >> --- /dev/null >> +++ b/benchmarks/gem_userptr_benchmark.c >> @@ -0,0 +1,513 @@ >> +/* >> + * Copyright =A9 2014 Intel Corporation >> + * >> + * Permission is hereby granted, free of charge, to any person obtainin= g a >> + * copy of this software and associated documentation files (the "Softw= are"), >> + * to deal in the Software without restriction, including without limit= ation >> + * the rights to use, copy, modify, merge, publish, distribute, sublice= nse, >> + * and/or sell copies of the Software, and to permit persons to whom the >> + * Software is furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice (including the= next >> + * paragraph) shall be included in all copies or substantial portions o= f the >> + * Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPR= ESS OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL= ITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT S= HALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR= OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARIS= ING >> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER = DEALINGS >> + * IN THE SOFTWARE. >> + * >> + * Authors: >> + * Tvrtko Ursulin >> + * >> + */ >> + >> +/** @file gem_userptr_benchmark.c >> + * >> + * Benchmark the userptr code and impact of having userptr surfaces >> + * in process address space on some normal operations. >> + * >> + */ >> + >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include "drm.h" >> +#include "i915_drm.h" >> +#include "drmtest.h" >> +#include "intel_bufmgr.h" >> +#include "intel_batchbuffer.h" >> +#include "intel_gpu_tools.h" >> + >> +#define WIDTH 128 >> +#define HEIGHT 128 >> +#define PAGE_SIZE 4096 >> + >> +#define LOCAL_I915_GEM_USERPTR 0x34 >> +#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL= _I915_GEM_USERPTR, struct local_i915_gem_userptr) >> +struct local_i915_gem_userptr { >> + uint64_t user_ptr; >> + uint64_t user_size; >> + uint32_t flags; >> +#define I915_USERPTR_READ_ONLY (1<<0) >> +#define I915_USERPTR_UNSYNCHRONIZED (1<<31) >> + uint32_t handle; >> +}; >> + >> +static uint32_t userptr_flags =3D I915_USERPTR_UNSYNCHRONIZED; >> + >> +static uint32_t linear[WIDTH*HEIGHT]; > > I may have missed it, but I don't think we use this variable except > to do sizeof(linear). If that's the case, a constant for the buffer > size might make the code more readable. You're right, too much copy & paste. >> + >> +static void gem_userptr_test_unsynchronized(void) >> +{ >> + userptr_flags =3D I915_USERPTR_UNSYNCHRONIZED; >> +} >> + >> +static void gem_userptr_test_synchronized(void) >> +{ >> + userptr_flags =3D 0; >> +} >> + >> +static int gem_userptr(int fd, void *ptr, int size, int read_only, uint= 32_t *handle) >> +{ >> + struct local_i915_gem_userptr userptr; >> + int ret; >> + >> + userptr.user_ptr =3D (uintptr_t)ptr; >> + userptr.user_size =3D size; >> + userptr.flags =3D userptr_flags; >> + if (read_only) >> + userptr.flags |=3D I915_USERPTR_READ_ONLY; >> + >> + ret =3D drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr); >> + if (ret) >> + ret =3D errno; >> + igt_skip_on_f(ret =3D=3D ENODEV && >> + (userptr_flags & I915_USERPTR_UNSYNCHRONIZED) =3D=3D 0, >> + "Skipping, synchronized mappings with no kernel CONFIG_MMU_NOTI= FIER?"); >> + if (ret =3D=3D 0) >> + *handle =3D userptr.handle; >> + >> + return ret; >> +} >> + >> +static uint32_t >> +create_userptr(int fd, uint32_t val, uint32_t *ptr) >> +{ >> + uint32_t handle; >> + int i, ret; >> + >> + ret =3D gem_userptr(fd, ptr, sizeof(linear), 0, &handle); >> + igt_assert(ret =3D=3D 0); >> + igt_assert(handle !=3D 0); >> + >> + /* Fill the BO with dwords starting at val */ >> + for (i =3D 0; i < WIDTH*HEIGHT; i++) >> + ptr[i] =3D val++; >> + >> + return handle; >> +} > > I don't see that this function is used in this test. You are right, artifact of this file starting from gem_userptr_blits.c = and then removing stuff. Obviously not enough removed. >> + >> +static void **handle_ptr_map; >> +static unsigned int num_handle_ptr_map; > > I'd prefer that we explicitly initialize at least num_handle_ptr_map. To zero, why? >> + >> +static void add_handle_ptr(uint32_t handle, void *ptr) >> +{ >> + if (handle >=3D num_handle_ptr_map) { >> + handle_ptr_map =3D realloc(handle_ptr_map, >> + (handle + 1000) * sizeof(void*)); >> + num_handle_ptr_map =3D handle + 1000; >> + } >> + >> + handle_ptr_map[handle] =3D ptr; >> +} >> + >> +static void *get_handle_ptr(uint32_t handle) >> +{ >> + return handle_ptr_map[handle]; >> +} >> + >> +static void free_handle_ptr(uint32_t handle) >> +{ >> + igt_assert(handle < num_handle_ptr_map); >> + igt_assert(handle_ptr_map[handle]); >> + >> + free(handle_ptr_map[handle]); >> + handle_ptr_map[handle] =3D NULL; >> +} >> + >> +static uint32_t create_userptr_bo(int fd, int size) >> +{ >> + void *ptr; >> + uint32_t handle; >> + int ret; >> + >> + ret =3D posix_memalign(&ptr, PAGE_SIZE, size); >> + igt_assert(ret =3D=3D 0); >> + >> + ret =3D gem_userptr(fd, (uint32_t *)ptr, size, 0, &handle); >> + igt_assert(ret =3D=3D 0); >> + add_handle_ptr(handle, ptr); >> + >> + return handle; >> +} >> + >> +static void free_userptr_bo(int fd, uint32_t handle) >> +{ >> + gem_close(fd, handle); >> + free_handle_ptr(handle); >> +} >> + >> +static int has_userptr(int fd) >> +{ >> + uint32_t handle =3D 0; >> + void *ptr; >> + uint32_t oldflags; >> + int ret; >> + >> + assert(posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE) =3D=3D 0); >> + oldflags =3D userptr_flags; >> + gem_userptr_test_unsynchronized(); >> + ret =3D gem_userptr(fd, ptr, PAGE_SIZE, 0, &handle); >> + userptr_flags =3D oldflags; >> + if (ret !=3D 0) { >> + free(ptr); >> + return 0; >> + } >> + >> + gem_close(fd, handle); >> + free(ptr); >> + >> + return handle !=3D 0; >> +} >> + >> +static const unsigned int nr_bos[] =3D {0, 1, 10, 100, 1000}; >> +static const unsigned int test_duration_sec =3D 3; >> + >> +static volatile unsigned int run_test; >> + >> +static void alarm_handler(int sig) >> +{ >> + assert(run_test =3D=3D 1); >> + run_test =3D 0; >> +} >> + >> +static void start_test(unsigned int duration) >> +{ >> + run_test =3D 1; >> + if (duration =3D=3D 0) >> + duration =3D test_duration_sec; >> + signal(SIGALRM, alarm_handler); >> + alarm(duration); >> +} >> + >> +static void exchange_ptr(void *array, unsigned i, unsigned j) >> +{ >> + void **arr, *tmp; >> + arr =3D (void **)array; >> + >> + tmp =3D arr[i]; >> + arr[i] =3D arr[j]; >> + arr[j] =3D tmp; >> +} >> + >> +static void test_malloc_free(int random) >> +{ >> + unsigned long iter =3D 0; >> + unsigned int i, tot =3D 1000; >> + void *ptr[tot]; >> + >> + start_test(test_duration_sec); >> + >> + while (run_test) { >> + for (i =3D 0; i < tot; i++) { >> + ptr[i] =3D malloc(1000); >> + assert(ptr[i]); >> + } >> + if (random) >> + igt_permute_array(ptr, tot, exchange_ptr); >> + for (i =3D 0; i < tot; i++) >> + free(ptr[i]); >> + iter++; >> + } >> + >> + printf("%8lu iter/s\n", iter / test_duration_sec); >> +} >> + >> +static void test_malloc_realloc_free(int random) >> +{ >> + unsigned long iter =3D 0; >> + unsigned int i, tot =3D 1000; >> + void *ptr[tot]; >> + >> + start_test(test_duration_sec); >> + >> + while (run_test) { >> + for (i =3D 0; i < tot; i++) { >> + ptr[i] =3D malloc(1000); >> + assert(ptr[i]); >> + } >> + if (random) >> + igt_permute_array(ptr, tot, exchange_ptr); >> + for (i =3D 0; i < tot; i++) { >> + ptr[i] =3D realloc(ptr[i], 2000); >> + assert(ptr[i]); >> + } >> + if (random) >> + igt_permute_array(ptr, tot, exchange_ptr); >> + for (i =3D 0; i < tot; i++) >> + free(ptr[i]); >> + iter++; >> + } >> + >> + printf("%8lu iter/s\n", iter / test_duration_sec); >> +} >> + >> +static void test_mmap_unmap(int random) >> +{ >> + unsigned long iter =3D 0; >> + unsigned int i, tot =3D 1000; >> + void *ptr[tot]; >> + >> + start_test(test_duration_sec); >> + >> + while (run_test) { >> + for (i =3D 0; i < tot; i++) { >> + ptr[i] =3D mmap(NULL, 1000, PROT_READ | PROT_WRITE, >> + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); >> + assert(ptr[i] !=3D MAP_FAILED); >> + } >> + if (random) >> + igt_permute_array(ptr, tot, exchange_ptr); >> + for (i =3D 0; i < tot; i++) >> + munmap(ptr[i], 1000); >> + iter++; >> + } >> + >> + printf("%8lu iter/s\n", iter / test_duration_sec); >> +} >> + >> +static void test_ptr_read(void *ptr) >> +{ >> + unsigned long iter =3D 0; >> + volatile unsigned long *p; >> + unsigned long i, loops; >> + register unsigned long v; >> + >> + loops =3D sizeof(linear) / sizeof(unsigned long) / 4; >> + >> + start_test(test_duration_sec); >> + >> + while (run_test) { >> + p =3D (unsigned long *)ptr; >> + for (i =3D 0; i < loops; i++) { >> + v =3D *p++; >> + v =3D *p++; >> + v =3D *p++; >> + v =3D *p++; >> + } >> + iter++; >> + } >> + >> + printf("%8lu MB/s\n", iter / test_duration_sec * sizeof(linear) / 1000= 000); >> +} >> + >> +static void test_ptr_write(void *ptr) >> +{ >> + unsigned long iter =3D 0; >> + volatile unsigned long *p; >> + register unsigned long i, loops; >> + >> + loops =3D sizeof(linear) / sizeof(unsigned long) / 4; >> + >> + start_test(test_duration_sec); >> + >> + while (run_test) { >> + p =3D (unsigned long *)ptr; >> + for (i =3D 0; i < loops; i++) { >> + *p++ =3D i; >> + *p++ =3D i; >> + *p++ =3D i; >> + *p++ =3D i; >> + } >> + iter++; >> + } >> + >> + printf("%8lu MB/s\n", iter / test_duration_sec * sizeof(linear) / 1000= 000); >> +} >> + >> +static void test_impact(int fd) >> +{ >> + unsigned int total =3D sizeof(nr_bos) / sizeof(nr_bos[0]); >> + unsigned int subtest, i; >> + uint32_t handles[nr_bos[total-1]]; >> + void *ptr; >> + char buffer[sizeof(linear)]; >> + >> + for (subtest =3D 0; subtest < total; subtest++) { >> + for (i =3D 0; i < nr_bos[subtest]; i++) >> + handles[i] =3D create_userptr_bo(fd, sizeof(linear)); >> + >> + if (nr_bos[subtest] > 0) >> + ptr =3D get_handle_ptr(handles[0]); >> + else >> + ptr =3D buffer; >> + >> + printf("ptr-read, %5u bos =3D ", nr_bos[subtest]); >> + test_ptr_read(ptr); >> + >> + printf("ptr-write %5u bos =3D ", nr_bos[subtest]); >> + test_ptr_write(ptr); >> + >> + printf("malloc-free, %5u bos =3D ", nr_bos[subtest]); >> + test_malloc_free(0); >> + printf("malloc-free-random %5u bos =3D ", nr_bos[subtest]); >> + test_malloc_free(1); >> + >> + printf("malloc-realloc-free, %5u bos =3D ", nr_bos[subtest]); >> + test_malloc_realloc_free(0); >> + printf("malloc-realloc-free-random, %5u bos =3D ", nr_bos[subtest]); >> + test_malloc_realloc_free(1); >> + >> + printf("mmap-unmap, %5u bos =3D ", nr_bos[subtest]); >> + test_mmap_unmap(0); >> + printf("mmap-unmap-random, %5u bos =3D ", nr_bos[subtest]); >> + test_mmap_unmap(1); >> + >> + for (i =3D 0; i < nr_bos[subtest]; i++) >> + free_userptr_bo(fd, handles[i]); >> + } >> +} >> + >> +static void test_single(int fd) >> +{ >> + char *ptr, *bo_ptr; >> + uint32_t handle =3D 0; >> + unsigned long iter =3D 0; >> + int ret; >> + unsigned long map_size =3D sizeof(linear) + PAGE_SIZE - 1; >> + >> + ptr =3D mmap(NULL, map_size, PROT_READ | PROT_WRITE, >> + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); >> + assert(ptr !=3D MAP_FAILED); >> + >> + bo_ptr =3D (char *)(((unsigned long)ptr + (PAGE_SIZE - 1)) >> + & ~(PAGE_SIZE - 1)); > > You might add an ALIGN macro in this file or a suitable header. A couple = of > .c files in lib/ individually define one already. OK, will add that in the patch series. Thanks for the review! Regards, Tvrtko