From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>, intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 32/46] drm/i915: Create/destroy VM (ppGTT) for use with contexts
Date: Tue, 12 Feb 2019 11:18:16 +0000 [thread overview]
Message-ID: <d38294ae-cfaa-ed62-16d6-6398202bdde2@linux.intel.com> (raw)
In-Reply-To: <20190206130356.18771-33-chris@chris-wilson.co.uk>
On 06/02/2019 13:03, Chris Wilson wrote:
> In preparation to making the ppGTT binding for a context explicit (to
> facilitate reusing the same ppGTT between different contexts), allow the
> user to create and destroy named ppGTT.
>
> v2: Replace global barrier for swapping over the ppgtt and tlbs with a
> local context barrier (Tvrtko)
> v3: serialise with struct_mutex; it's lazy but required dammit
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_drv.c | 2 +
> drivers/gpu/drm/i915/i915_drv.h | 3 +
> drivers/gpu/drm/i915/i915_gem_context.c | 254 +++++++++++++++++-
> drivers/gpu/drm/i915/i915_gem_context.h | 5 +
> drivers/gpu/drm/i915/i915_gem_gtt.c | 17 +-
> drivers/gpu/drm/i915/i915_gem_gtt.h | 16 +-
> drivers/gpu/drm/i915/selftests/huge_pages.c | 1 -
> .../gpu/drm/i915/selftests/i915_gem_context.c | 239 ++++++++++++----
> drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 1 -
> drivers/gpu/drm/i915/selftests/mock_context.c | 8 +-
> include/uapi/drm/i915_drm.h | 35 +++
> 11 files changed, 510 insertions(+), 71 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 36da8ab1e7ce..487e78094e93 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -3005,6 +3005,8 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
> DRM_IOCTL_DEF_DRV(I915_PERF_ADD_CONFIG, i915_perf_add_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(I915_PERF_REMOVE_CONFIG, i915_perf_remove_config_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> DRM_IOCTL_DEF_DRV(I915_QUERY, i915_query_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(I915_GEM_VM_CREATE, i915_gem_vm_create_ioctl, DRM_RENDER_ALLOW),
> + DRM_IOCTL_DEF_DRV(I915_GEM_VM_DESTROY, i915_gem_vm_destroy_ioctl, DRM_RENDER_ALLOW),
> };
>
> static struct drm_driver driver = {
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index e554691304dc..523de3644570 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -217,6 +217,9 @@ struct drm_i915_file_private {
> } mm;
> struct idr context_idr;
>
> + struct mutex vm_lock;
> + struct idr vm_idr;
> +
> unsigned int bsd_engine;
>
> /*
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index c3f41f501276..dd49b1ef3ff2 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -110,6 +110,8 @@ static void lut_close(struct i915_gem_context *ctx)
> struct i915_vma *vma = rcu_dereference_raw(*slot);
>
> radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
> +
> + vma->open_count--;
I am finding vma->open_count handling seriously confusing. So maybe that
means there should be a comment here as minimum.
What is the path in the current code which brings vma->open_count back
to zero? If it is not done from lut_close, but object is removed from
the lut_list, then the only current decrement in i915_gem_close_object
won't run. Surely I am missing something..
> __i915_gem_object_release_unless_active(vma->obj);
> }
> rcu_read_unlock();
> @@ -293,7 +295,7 @@ static void context_close(struct i915_gem_context *ctx)
> */
> lut_close(ctx);
> if (ctx->ppgtt)
> - i915_ppgtt_close(&ctx->ppgtt->vm);
> + i915_ppgtt_close(ctx->ppgtt);
>
> ctx->file_priv = ERR_PTR(-EBADF);
> i915_gem_context_put(ctx);
> @@ -425,6 +427,32 @@ static void __destroy_hw_context(struct i915_gem_context *ctx,
> context_close(ctx);
> }
>
> +static struct i915_hw_ppgtt *
> +__set_ppgtt(struct i915_gem_context *ctx, struct i915_hw_ppgtt *ppgtt)
> +{
> + struct i915_hw_ppgtt *old = ctx->ppgtt;
> +
> + i915_ppgtt_open(ppgtt);
> + ctx->ppgtt = i915_ppgtt_get(ppgtt);
> +
> + ctx->desc_template = default_desc_template(ctx->i915, ppgtt);
> +
> + return old;
> +}
> +
> +static void __assign_ppgtt(struct i915_gem_context *ctx,
> + struct i915_hw_ppgtt *ppgtt)
> +{
> + if (ppgtt == ctx->ppgtt)
> + return;
> +
> + ppgtt = __set_ppgtt(ctx, ppgtt);
> + if (ppgtt) {
> + i915_ppgtt_close(ppgtt);
> + i915_ppgtt_put(ppgtt);
> + }
> +}
> +
> static struct i915_gem_context *
> i915_gem_create_context(struct drm_i915_private *dev_priv,
> struct drm_i915_file_private *file_priv)
> @@ -451,8 +479,8 @@ i915_gem_create_context(struct drm_i915_private *dev_priv,
> return ERR_CAST(ppgtt);
> }
>
> - ctx->ppgtt = ppgtt;
> - ctx->desc_template = default_desc_template(dev_priv, ppgtt);
> + __assign_ppgtt(ctx, ppgtt);
> + i915_ppgtt_put(ppgtt);
Looks strange and one realizes it is dropping the ref __assign_ppgtt
takes. Not sure if it wouldn't be better to just open code what this
site needs.
> }
>
> trace_i915_context_create(ctx);
> @@ -633,19 +661,29 @@ static int context_idr_cleanup(int id, void *p, void *data)
> return 0;
> }
>
> +static int vm_idr_cleanup(int id, void *p, void *data)
> +{
> + i915_ppgtt_put(p);
> + return 0;
> +}
> +
> int i915_gem_context_open(struct drm_i915_private *i915,
> struct drm_file *file)
> {
> struct drm_i915_file_private *file_priv = file->driver_priv;
> struct i915_gem_context *ctx;
>
> + mutex_init(&file_priv->vm_lock);
> +
> idr_init(&file_priv->context_idr);
> + idr_init_base(&file_priv->vm_idr, 1);
>
> mutex_lock(&i915->drm.struct_mutex);
> ctx = i915_gem_create_context(i915, file_priv);
> mutex_unlock(&i915->drm.struct_mutex);
> if (IS_ERR(ctx)) {
> idr_destroy(&file_priv->context_idr);
> + idr_destroy(&file_priv->vm_idr);
> return PTR_ERR(ctx);
> }
>
> @@ -662,6 +700,89 @@ void i915_gem_context_close(struct drm_file *file)
>
> idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
> idr_destroy(&file_priv->context_idr);
> +
> + idr_for_each(&file_priv->vm_idr, vm_idr_cleanup, NULL);
> + idr_destroy(&file_priv->vm_idr);
The name of this function always confuses me. Should we rename it to
i915_gem_close_contexts or something?
> +
> + mutex_destroy(&file_priv->vm_lock);
> +}
> +
> +int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file)
> +{
> + struct drm_i915_private *i915 = to_i915(dev);
> + struct drm_i915_gem_vm_control *args = data;
> + struct drm_i915_file_private *file_priv = file->driver_priv;
> + struct i915_hw_ppgtt *ppgtt;
> + int err;
> +
> + if (!HAS_FULL_PPGTT(i915))
> + return -ENODEV;
> +
> + if (args->flags)
> + return -EINVAL;
> +
> + if (args->extensions)
> + return -EINVAL;
> +
> + ppgtt = i915_ppgtt_create(i915, file_priv);
> + if (IS_ERR(ppgtt))
> + return PTR_ERR(ppgtt);
> +
> + err = mutex_lock_interruptible(&file_priv->vm_lock);
> + if (err)
> + goto err_put;
> +
> + err = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);
> + mutex_unlock(&file_priv->vm_lock);
> + if (err < 0)
> + goto err_put;
> +
> + GEM_BUG_ON(err == 0); /* reserved for default/unassigned ppgtt */
> + ppgtt->user_handle = err;
> + args->id = err;
> + return 0;
> +
> +err_put:
> + i915_ppgtt_put(ppgtt);
> + return err;
> +}
> +
> +int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file)
> +{
> + struct drm_i915_file_private *file_priv = file->driver_priv;
> + struct drm_i915_gem_vm_control *args = data;
> + struct i915_hw_ppgtt *ppgtt;
> + int err;
> + u32 id;
> +
> + if (args->flags)
> + return -EINVAL;
> +
> + if (args->extensions)
> + return -EINVAL;
> +
> + id = args->id;
> + if (!id)
> + return -ENOENT;
> +
> + err = mutex_lock_interruptible(&file_priv->vm_lock);
> + if (err)
> + return err;
> +
> + ppgtt = idr_remove(&file_priv->vm_idr, id);
> + if (ppgtt) {
> + GEM_BUG_ON(!ppgtt->user_handle);
> + ppgtt->user_handle = 0;
> + }
> +
> + mutex_unlock(&file_priv->vm_lock);
> + if (!ppgtt)
> + return -ENOENT;
> +
> + i915_ppgtt_put(ppgtt);
> + return 0;
> }
>
> static struct i915_request *
> @@ -809,6 +930,120 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915,
> return 0;
> }
>
> +static int get_ppgtt(struct i915_gem_context *ctx,
> + struct drm_i915_gem_context_param *args)
> +{
> + struct drm_i915_file_private *file_priv = ctx->file_priv;
> + struct i915_hw_ppgtt *ppgtt;
> + int ret;
> +
> + if (!ctx->ppgtt)
> + return -ENODEV;
> +
> + /* XXX rcu acquire? */
> + ret = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
Only to serialize threads working on the same ctx? Why not do that under
the new vm->lock instead?
> + if (ret)
> + return ret;
> +
> + ppgtt = i915_ppgtt_get(ctx->ppgtt);
> + mutex_unlock(&ctx->i915->drm.struct_mutex);
> +
> + ret = mutex_lock_interruptible(&file_priv->vm_lock);
> + if (ret)
> + goto err_put;
> +
> + if (!ppgtt->user_handle) {
> + ret = idr_alloc(&file_priv->vm_idr, ppgtt, 0, 0, GFP_KERNEL);
> + GEM_BUG_ON(!ret);
> + if (ret < 0)
> + goto err_unlock;
> +
> + ppgtt->user_handle = ret;
> + i915_ppgtt_get(ppgtt);
> + }
> +
> + args->size = 0;
> + args->value = ppgtt->user_handle;
> +
> + ret = 0;
> +err_unlock:
> + mutex_unlock(&file_priv->vm_lock);
> +err_put:
> + i915_ppgtt_put(ppgtt);
> + return ret;
> +}
> +
> +static void set_ppgtt_barrier(void *data)
> +{
> + struct i915_hw_ppgtt *old = data;
> +
> + i915_ppgtt_close(old);
> + i915_ppgtt_put(old);
> +}
> +
> +static int set_ppgtt(struct i915_gem_context *ctx,
> + struct drm_i915_gem_context_param *args)
> +{
> + struct drm_i915_file_private *file_priv = ctx->file_priv;
> + struct i915_hw_ppgtt *ppgtt, *old;
> + int err;
> +
> + if (args->size)
> + return -EINVAL;
> +
> + if (upper_32_bits(args->value))
> + return -EINVAL;
> +
> + if (!ctx->ppgtt)
> + return -ENODEV;
> +
> + err = mutex_lock_interruptible(&file_priv->vm_lock);
> + if (err)
> + return err;
> +
> + ppgtt = idr_find(&file_priv->vm_idr, args->value);
> + if (ppgtt) {
> + GEM_BUG_ON(ppgtt->user_handle != args->value);
> + i915_ppgtt_get(ppgtt);
> + }
> + mutex_unlock(&file_priv->vm_lock);
> + if (!ppgtt)
> + return -ENOENT;
> +
> + err = mutex_lock_interruptible(&ctx->i915->drm.struct_mutex);
> + if (err)
> + goto out;
> +
> + if (ppgtt == ctx->ppgtt)
> + goto unlock;
> +
> + /* Teardown the existing obj:vma cache, it will have to be rebuilt. */
> + lut_close(ctx);
Nesting issues I guess, the answer to my previous question.
> +
> + old = __set_ppgtt(ctx, ppgtt);
> +
> + /*
> + * We need to flush any requests using the current ppgtt before
> + * we release it as the requests do not hold a reference themselves,
> + * only indirectly through the context.
> + */
> + err = context_barrier_task(ctx, -1, set_ppgtt_barrier, old);
But barrier can be retired on user interrupt with context save still
running, no?
Regards,
Tvrtko
> + if (err) {
> + ctx->ppgtt = old;
> + ctx->desc_template = default_desc_template(ctx->i915, old);
> +
> + i915_ppgtt_close(ppgtt);
> + i915_ppgtt_put(ppgtt);
> + }
> +
> +unlock:
> + mutex_unlock(&ctx->i915->drm.struct_mutex);
> +
> +out:
> + i915_ppgtt_put(ppgtt);
> + return err;
> +}
> +
> static bool client_is_banned(struct drm_i915_file_private *file_priv)
> {
> return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
> @@ -979,6 +1214,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
> case I915_CONTEXT_PARAM_SSEU:
> ret = get_sseu(ctx, args);
> break;
> + case I915_CONTEXT_PARAM_VM:
> + ret = get_ppgtt(ctx, args);
> + break;
> default:
> ret = -EINVAL;
> break;
> @@ -1276,9 +1514,6 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
> return -ENOENT;
>
> switch (args->param) {
> - case I915_CONTEXT_PARAM_BAN_PERIOD:
> - ret = -EINVAL;
> - break;
> case I915_CONTEXT_PARAM_NO_ZEROMAP:
> if (args->size)
> ret = -EINVAL;
> @@ -1325,9 +1560,16 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
> I915_USER_PRIORITY(priority);
> }
> break;
> +
> case I915_CONTEXT_PARAM_SSEU:
> ret = set_sseu(ctx, args);
> break;
> +
> + case I915_CONTEXT_PARAM_VM:
> + ret = set_ppgtt(ctx, args);
> + break;
> +
> + case I915_CONTEXT_PARAM_BAN_PERIOD:
> default:
> ret = -EINVAL;
> break;
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
> index ab89c7501408..c5a6cb10dbda 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.h
> +++ b/drivers/gpu/drm/i915/i915_gem_context.h
> @@ -365,6 +365,11 @@ void i915_gem_context_release(struct kref *ctx_ref);
> struct i915_gem_context *
> i915_gem_context_create_gvt(struct drm_device *dev);
>
> +int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file);
> +int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file);
> +
> int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
> struct drm_file *file);
> int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index d646d37eec2f..ccf10306b1f5 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2103,10 +2103,21 @@ i915_ppgtt_create(struct drm_i915_private *i915,
> return ppgtt;
> }
>
> -void i915_ppgtt_close(struct i915_address_space *vm)
> +void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt)
> {
> - GEM_BUG_ON(vm->closed);
> - vm->closed = true;
> + GEM_BUG_ON(ppgtt->vm.closed);
> +
> + ppgtt->open_count++;
> +}
> +
> +void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt)
> +{
> + GEM_BUG_ON(!ppgtt->open_count);
> + if (--ppgtt->open_count)
> + return;
> +
> + GEM_BUG_ON(ppgtt->vm.closed);
> + ppgtt->vm.closed = true;
> }
>
> static void ppgtt_destroy_vma(struct i915_address_space *vm)
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 03ade71b8d9a..bb750318f52a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -391,11 +391,15 @@ struct i915_hw_ppgtt {
> struct kref ref;
>
> unsigned long pd_dirty_rings;
> + unsigned int open_count;
> +
> union {
> struct i915_pml4 pml4; /* GEN8+ & 48b PPGTT */
> struct i915_page_directory_pointer pdp; /* GEN8+ */
> struct i915_page_directory pd; /* GEN6-7 */
> };
> +
> + u32 user_handle;
> };
>
> struct gen6_hw_ppgtt {
> @@ -606,12 +610,16 @@ int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv);
> void i915_ppgtt_release(struct kref *kref);
> struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv,
> struct drm_i915_file_private *fpriv);
> -void i915_ppgtt_close(struct i915_address_space *vm);
> -static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
> +
> +void i915_ppgtt_open(struct i915_hw_ppgtt *ppgtt);
> +void i915_ppgtt_close(struct i915_hw_ppgtt *ppgtt);
> +
> +static inline struct i915_hw_ppgtt *i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
> {
> - if (ppgtt)
> - kref_get(&ppgtt->ref);
> + kref_get(&ppgtt->ref);
> + return ppgtt;
> }
> +
> static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt)
> {
> if (ppgtt)
> diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
> index a9a2fa35876f..a7ee8e97bcee 100644
> --- a/drivers/gpu/drm/i915/selftests/huge_pages.c
> +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
> @@ -1734,7 +1734,6 @@ int i915_gem_huge_page_mock_selftests(void)
> err = i915_subtests(tests, ppgtt);
>
> out_close:
> - i915_ppgtt_close(&ppgtt->vm);
> i915_ppgtt_put(ppgtt);
>
> out_unlock:
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> index 4b6df1c55345..a76a4f6f67e4 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
> @@ -372,7 +372,8 @@ static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
> return 0;
> }
>
> -static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
> +static noinline int cpu_check(struct drm_i915_gem_object *obj,
> + unsigned int idx, unsigned int max)
> {
> unsigned int n, m, needs_flush;
> int err;
> @@ -390,8 +391,10 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
>
> for (m = 0; m < max; m++) {
> if (map[m] != m) {
> - pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
> - n, m, map[m], m);
> + pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n",
> + __builtin_return_address(0), idx,
> + n, real_page_count(obj), m, max,
> + map[m], m);
> err = -EINVAL;
> goto out_unmap;
> }
> @@ -399,8 +402,9 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max)
>
> for (; m < DW_PER_PAGE; m++) {
> if (map[m] != STACK_MAGIC) {
> - pr_err("Invalid value at page %d, offset %d: found %x expected %x\n",
> - n, m, map[m], STACK_MAGIC);
> + pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n",
> + __builtin_return_address(0), idx, n, m,
> + map[m], STACK_MAGIC);
> err = -EINVAL;
> goto out_unmap;
> }
> @@ -478,12 +482,8 @@ static unsigned long max_dwords(struct drm_i915_gem_object *obj)
> static int igt_ctx_exec(void *arg)
> {
> struct drm_i915_private *i915 = arg;
> - struct drm_i915_gem_object *obj = NULL;
> - unsigned long ncontexts, ndwords, dw;
> - struct igt_live_test t;
> - struct drm_file *file;
> - IGT_TIMEOUT(end_time);
> - LIST_HEAD(objects);
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> int err = -ENODEV;
>
> /*
> @@ -495,38 +495,42 @@ static int igt_ctx_exec(void *arg)
> if (!DRIVER_CAPS(i915)->has_logical_contexts)
> return 0;
>
> - file = mock_file(i915);
> - if (IS_ERR(file))
> - return PTR_ERR(file);
> + for_each_engine(engine, i915, id) {
> + struct drm_i915_gem_object *obj = NULL;
> + unsigned long ncontexts, ndwords, dw;
> + struct igt_live_test t;
> + struct drm_file *file;
> + IGT_TIMEOUT(end_time);
> + LIST_HEAD(objects);
>
> - mutex_lock(&i915->drm.struct_mutex);
> + if (!intel_engine_can_store_dword(engine))
> + continue;
>
> - err = igt_live_test_begin(&t, i915, __func__, "");
> - if (err)
> - goto out_unlock;
> + if (!engine->context_size)
> + continue; /* No logical context support in HW */
>
> - ncontexts = 0;
> - ndwords = 0;
> - dw = 0;
> - while (!time_after(jiffies, end_time)) {
> - struct intel_engine_cs *engine;
> - struct i915_gem_context *ctx;
> - unsigned int id;
> + file = mock_file(i915);
> + if (IS_ERR(file))
> + return PTR_ERR(file);
>
> - ctx = i915_gem_create_context(i915, file->driver_priv);
> - if (IS_ERR(ctx)) {
> - err = PTR_ERR(ctx);
> + mutex_lock(&i915->drm.struct_mutex);
> +
> + err = igt_live_test_begin(&t, i915, __func__, engine->name);
> + if (err)
> goto out_unlock;
> - }
>
> - for_each_engine(engine, i915, id) {
> + ncontexts = 0;
> + ndwords = 0;
> + dw = 0;
> + while (!time_after(jiffies, end_time)) {
> + struct i915_gem_context *ctx;
> intel_wakeref_t wakeref;
>
> - if (!engine->context_size)
> - continue; /* No logical context support in HW */
> -
> - if (!intel_engine_can_store_dword(engine))
> - continue;
> + ctx = i915_gem_create_context(i915, file->driver_priv);
> + if (IS_ERR(ctx)) {
> + err = PTR_ERR(ctx);
> + goto out_unlock;
> + }
>
> if (!obj) {
> obj = create_test_object(ctx, file, &objects);
> @@ -536,7 +540,6 @@ static int igt_ctx_exec(void *arg)
> }
> }
>
> - err = 0;
> with_intel_runtime_pm(i915, wakeref)
> err = gpu_fill(obj, ctx, engine, dw);
> if (err) {
> @@ -551,32 +554,158 @@ static int igt_ctx_exec(void *arg)
> obj = NULL;
> dw = 0;
> }
> +
> ndwords++;
> + ncontexts++;
> }
> - ncontexts++;
> +
> + pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
> + ncontexts, engine->name, ndwords);
> +
> + ncontexts = dw = 0;
> + list_for_each_entry(obj, &objects, st_link) {
> + unsigned int rem =
> + min_t(unsigned int, ndwords - dw, max_dwords(obj));
> +
> + err = cpu_check(obj, ncontexts++, rem);
> + if (err)
> + break;
> +
> + dw += rem;
> + }
> +
> +out_unlock:
> + if (igt_live_test_end(&t))
> + err = -EIO;
> + mutex_unlock(&i915->drm.struct_mutex);
> +
> + mock_file_free(i915, file);
> + if (err)
> + return err;
> }
> - pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n",
> - ncontexts, RUNTIME_INFO(i915)->num_rings, ndwords);
>
> - dw = 0;
> - list_for_each_entry(obj, &objects, st_link) {
> - unsigned int rem =
> - min_t(unsigned int, ndwords - dw, max_dwords(obj));
> + return 0;
> +}
> +
> +static int igt_shared_ctx_exec(void *arg)
> +{
> + struct drm_i915_private *i915 = arg;
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> + int err = -ENODEV;
> +
> + /*
> + * Create a few different contexts with the same mm and write
> + * through each ctx using the GPU making sure those writes end
> + * up in the expected pages of our obj.
> + */
> +
> + for_each_engine(engine, i915, id) {
> + unsigned long ncontexts, ndwords, dw;
> + struct drm_i915_gem_object *obj = NULL;
> + struct i915_gem_context *ctx = NULL;
> + struct i915_gem_context *parent;
> + struct igt_live_test t;
> + struct drm_file *file;
> + IGT_TIMEOUT(end_time);
> + LIST_HEAD(objects);
>
> - err = cpu_check(obj, rem);
> + if (!intel_engine_can_store_dword(engine))
> + continue;
> +
> + file = mock_file(i915);
> + if (IS_ERR(file))
> + return PTR_ERR(file);
> +
> + mutex_lock(&i915->drm.struct_mutex);
> +
> + err = igt_live_test_begin(&t, i915, __func__, engine->name);
> if (err)
> - break;
> + goto out_unlock;
>
> - dw += rem;
> - }
> + parent = i915_gem_create_context(i915, file->driver_priv);
> + if (IS_ERR(parent)) {
> + err = PTR_ERR(parent);
> + if (err == -ENODEV) /* no logical ctx support */
> + err = 0;
> + goto out_unlock;
> + }
> +
> + if (!parent->ppgtt) {
> + err = 0;
> + goto out_unlock;
> + }
> +
> + ncontexts = 0;
> + ndwords = 0;
> + dw = 0;
> + while (!time_after(jiffies, end_time)) {
> + intel_wakeref_t wakeref;
> +
> + if (ctx)
> + __destroy_hw_context(ctx, file->driver_priv);
> +
> + ctx = i915_gem_create_context(i915, file->driver_priv);
> + if (IS_ERR(ctx)) {
> + err = PTR_ERR(ctx);
> + goto out_unlock;
> + }
> +
> + __assign_ppgtt(ctx, parent->ppgtt);
> +
> + if (!obj) {
> + obj = create_test_object(parent, file, &objects);
> + if (IS_ERR(obj)) {
> + err = PTR_ERR(obj);
> + goto out_unlock;
> + }
> + }
> +
> + err = 0;
> + with_intel_runtime_pm(i915, wakeref)
> + err = gpu_fill(obj, ctx, engine, dw);
> + if (err) {
> + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n",
> + ndwords, dw, max_dwords(obj),
> + engine->name, ctx->hw_id,
> + yesno(!!ctx->ppgtt), err);
> + goto out_unlock;
> + }
> +
> + if (++dw == max_dwords(obj)) {
> + obj = NULL;
> + dw = 0;
> + }
> +
> + ndwords++;
> + ncontexts++;
> + }
> + pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
> + ncontexts, engine->name, ndwords);
> +
> + ncontexts = dw = 0;
> + list_for_each_entry(obj, &objects, st_link) {
> + unsigned int rem =
> + min_t(unsigned int, ndwords - dw, max_dwords(obj));
> +
> + err = cpu_check(obj, ncontexts++, rem);
> + if (err)
> + break;
> +
> + dw += rem;
> + }
>
> out_unlock:
> - if (igt_live_test_end(&t))
> - err = -EIO;
> - mutex_unlock(&i915->drm.struct_mutex);
> + if (igt_live_test_end(&t))
> + err = -EIO;
> + mutex_unlock(&i915->drm.struct_mutex);
>
> - mock_file_free(i915, file);
> - return err;
> + mock_file_free(i915, file);
> + if (err)
> + return err;
> + }
> +
> + return 0;
> }
>
> static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
> @@ -1048,7 +1177,7 @@ static int igt_ctx_readonly(void *arg)
> struct drm_i915_gem_object *obj = NULL;
> struct i915_gem_context *ctx;
> struct i915_hw_ppgtt *ppgtt;
> - unsigned long ndwords, dw;
> + unsigned long idx, ndwords, dw;
> struct igt_live_test t;
> struct drm_file *file;
> I915_RND_STATE(prng);
> @@ -1129,6 +1258,7 @@ static int igt_ctx_readonly(void *arg)
> ndwords, RUNTIME_INFO(i915)->num_rings);
>
> dw = 0;
> + idx = 0;
> list_for_each_entry(obj, &objects, st_link) {
> unsigned int rem =
> min_t(unsigned int, ndwords - dw, max_dwords(obj));
> @@ -1138,7 +1268,7 @@ static int igt_ctx_readonly(void *arg)
> if (i915_gem_object_is_readonly(obj))
> num_writes = 0;
>
> - err = cpu_check(obj, num_writes);
> + err = cpu_check(obj, idx++, num_writes);
> if (err)
> break;
>
> @@ -1723,6 +1853,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv)
> SUBTEST(igt_ctx_exec),
> SUBTEST(igt_ctx_readonly),
> SUBTEST(igt_ctx_sseu),
> + SUBTEST(igt_shared_ctx_exec),
> SUBTEST(igt_vm_isolation),
> };
>
> diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> index 3850ef4a5ec8..08a8f3d20854 100644
> --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
> @@ -1020,7 +1020,6 @@ static int exercise_ppgtt(struct drm_i915_private *dev_priv,
>
> err = func(dev_priv, &ppgtt->vm, 0, ppgtt->vm.total, end_time);
>
> - i915_ppgtt_close(&ppgtt->vm);
> i915_ppgtt_put(ppgtt);
> out_unlock:
> mutex_unlock(&dev_priv->drm.struct_mutex);
> diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c
> index 1b5073a362eb..2189b606ca41 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_context.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_context.c
> @@ -54,13 +54,17 @@ mock_context(struct drm_i915_private *i915,
> goto err_handles;
>
> if (name) {
> + struct i915_hw_ppgtt *ppgtt;
> +
> ctx->name = kstrdup(name, GFP_KERNEL);
> if (!ctx->name)
> goto err_put;
>
> - ctx->ppgtt = mock_ppgtt(i915, name);
> - if (!ctx->ppgtt)
> + ppgtt = mock_ppgtt(i915, name);
> + if (!ppgtt)
> goto err_put;
> +
> + __set_ppgtt(ctx, ppgtt);
> }
>
> return ctx;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index be2fcdf3ba90..2cd79639d6b5 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -339,6 +339,8 @@ typedef struct _drm_i915_sarea {
> #define DRM_I915_PERF_ADD_CONFIG 0x37
> #define DRM_I915_PERF_REMOVE_CONFIG 0x38
> #define DRM_I915_QUERY 0x39
> +#define DRM_I915_GEM_VM_CREATE 0x3a
> +#define DRM_I915_GEM_VM_DESTROY 0x3b
>
> #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
> #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -397,6 +399,8 @@ typedef struct _drm_i915_sarea {
> #define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
> #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
> #define DRM_IOCTL_I915_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
> +#define DRM_IOCTL_I915_GEM_VM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control)
> +#define DRM_IOCTL_I915_GEM_VM_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control)
>
> /* Allow drivers to submit batchbuffers directly to hardware, relying
> * on the security mechanisms provided by hardware.
> @@ -1444,6 +1448,26 @@ struct drm_i915_gem_context_destroy {
> __u32 pad;
> };
>
> +/*
> + * DRM_I915_GEM_VM_CREATE -
> + *
> + * Create a new virtual memory address space (ppGTT) for use within a context
> + * on the same file. Extensions can be provided to configure exactly how the
> + * address space is setup upon creation.
> + *
> + * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is
> + * returned.
> + *
> + * DRM_I915_GEM_VM_DESTROY -
> + *
> + * Destroys a previously created VM id.
> + */
> +struct drm_i915_gem_vm_control {
> + __u64 extensions;
> + __u32 flags;
> + __u32 id;
> +};
> +
> struct drm_i915_reg_read {
> /*
> * Register offset.
> @@ -1513,6 +1537,17 @@ struct drm_i915_gem_context_param {
> * drm_i915_gem_context_param_sseu.
> */
> #define I915_CONTEXT_PARAM_SSEU 0x7
> +
> + /*
> + * The id of the associated virtual memory address space (ppGTT) of
> + * this context. Can be retrieved and passed to another context
> + * (on the same fd) for both to use the same ppGTT and so share
> + * address layouts, and avoid reloading the page tables on context
> + * switches between themselves.
> + *
> + * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY.
> + */
> +#define I915_CONTEXT_PARAM_VM 0x8
> __u64 value;
> };
>
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
next prev parent reply other threads:[~2019-02-12 11:18 UTC|newest]
Thread overview: 97+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-02-06 13:03 The road to load balancing Chris Wilson
2019-02-06 13:03 ` [PATCH 01/46] drm/i915: Hack and slash, throttle execbuffer hogs Chris Wilson
2019-02-06 13:03 ` [PATCH 02/46] drm/i915: Revoke mmaps and prevent access to fence registers across reset Chris Wilson
2019-02-06 15:56 ` Mika Kuoppala
2019-02-06 16:08 ` Chris Wilson
2019-02-06 16:18 ` Chris Wilson
2019-02-26 19:53 ` Rodrigo Vivi
2019-02-26 20:27 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 03/46] drm/i915: Force the GPU reset upon wedging Chris Wilson
2019-02-06 13:03 ` [PATCH 04/46] drm/i915: Uninterruptibly drain the timelines on unwedging Chris Wilson
2019-02-06 13:03 ` [PATCH 05/46] drm/i915: Wait for old resets before applying debugfs/i915_wedged Chris Wilson
2019-02-06 13:03 ` [PATCH 06/46] drm/i915: Serialise resets with wedging Chris Wilson
2019-02-06 13:03 ` [PATCH 07/46] drm/i915: Don't claim an unstarted request was guilty Chris Wilson
2019-02-06 13:03 ` [PATCH 08/46] drm/i915/execlists: Suppress mere WAIT preemption Chris Wilson
2019-02-11 11:19 ` Tvrtko Ursulin
2019-02-19 10:22 ` Matthew Auld
2019-02-19 10:34 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 09/46] drm/i915/execlists: Suppress redundant preemption Chris Wilson
2019-02-06 13:03 ` [PATCH 10/46] drm/i915: Make request allocation caches global Chris Wilson
2019-02-11 11:43 ` Tvrtko Ursulin
2019-02-11 12:40 ` Chris Wilson
2019-02-11 17:02 ` Tvrtko Ursulin
2019-02-12 11:51 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 11/46] drm/i915: Keep timeline HWSP allocated until idle across the system Chris Wilson
2019-02-06 13:03 ` [PATCH 12/46] drm/i915/execlists: Refactor out can_merge_rq() Chris Wilson
2019-02-06 13:03 ` [PATCH 13/46] drm/i915: Compute the global scheduler caps Chris Wilson
2019-02-11 12:24 ` Tvrtko Ursulin
2019-02-11 12:33 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 14/46] drm/i915: Use HW semaphores for inter-engine synchronisation on gen8+ Chris Wilson
2019-02-06 13:03 ` [PATCH 15/46] drm/i915: Prioritise non-busywait semaphore workloads Chris Wilson
2019-02-06 13:03 ` [PATCH 16/46] drm/i915: Show support for accurate sw PMU busyness tracking Chris Wilson
2019-02-06 13:03 ` [PATCH 17/46] drm/i915: Apply rps waitboosting for dma_fence_wait_timeout() Chris Wilson
2019-02-11 18:06 ` Tvrtko Ursulin
2019-02-06 13:03 ` [PATCH 18/46] drm/i915: Replace global_seqno with a hangcheck heartbeat seqno Chris Wilson
2019-02-11 12:40 ` Tvrtko Ursulin
2019-02-11 12:44 ` Chris Wilson
2019-02-11 16:56 ` Tvrtko Ursulin
2019-02-12 13:36 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 19/46] drm/i915/pmu: Always sample an active ringbuffer Chris Wilson
2019-02-11 18:18 ` Tvrtko Ursulin
2019-02-12 13:40 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 20/46] drm/i915: Remove access to global seqno in the HWSP Chris Wilson
2019-02-11 18:22 ` Tvrtko Ursulin
2019-02-06 13:03 ` [PATCH 21/46] drm/i915: Remove i915_request.global_seqno Chris Wilson
2019-02-11 18:44 ` Tvrtko Ursulin
2019-02-12 13:45 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 22/46] drm/i915: Force GPU idle on suspend Chris Wilson
2019-02-06 13:03 ` [PATCH 23/46] drm/i915/selftests: Improve switch-to-kernel-context checking Chris Wilson
2019-02-06 13:03 ` [PATCH 24/46] drm/i915: Do a synchronous switch-to-kernel-context on idling Chris Wilson
2019-02-21 19:48 ` Daniele Ceraolo Spurio
2019-02-21 21:17 ` Chris Wilson
2019-02-21 21:31 ` Daniele Ceraolo Spurio
2019-02-21 21:42 ` Chris Wilson
2019-02-21 22:53 ` Daniele Ceraolo Spurio
2019-02-21 23:25 ` Chris Wilson
2019-02-22 0:29 ` Daniele Ceraolo Spurio
2019-02-06 13:03 ` [PATCH 25/46] drm/i915: Store the BIT(engine->id) as the engine's mask Chris Wilson
2019-02-11 18:51 ` Tvrtko Ursulin
2019-02-12 13:51 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 26/46] drm/i915: Refactor common code to load initial power context Chris Wilson
2019-02-06 13:03 ` [PATCH 27/46] drm/i915: Reduce presumption of request ordering for barriers Chris Wilson
2019-02-06 13:03 ` [PATCH 28/46] drm/i915: Remove has-kernel-context Chris Wilson
2019-02-06 13:03 ` [PATCH 29/46] drm/i915: Introduce the i915_user_extension_method Chris Wilson
2019-02-11 19:00 ` Tvrtko Ursulin
2019-02-12 13:56 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 30/46] drm/i915: Track active engines within a context Chris Wilson
2019-02-11 19:11 ` Tvrtko Ursulin
2019-02-12 13:59 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 31/46] drm/i915: Introduce a context barrier callback Chris Wilson
2019-02-06 13:03 ` [PATCH 32/46] drm/i915: Create/destroy VM (ppGTT) for use with contexts Chris Wilson
2019-02-12 11:18 ` Tvrtko Ursulin [this message]
2019-02-12 14:11 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 33/46] drm/i915: Extend CONTEXT_CREATE to set parameters upon construction Chris Wilson
2019-02-12 13:43 ` Tvrtko Ursulin
2019-02-06 13:03 ` [PATCH 34/46] drm/i915: Allow contexts to share a single timeline across all engines Chris Wilson
2019-02-06 13:03 ` [PATCH 35/46] drm/i915: Fix I915_EXEC_RING_MASK Chris Wilson
2019-02-06 13:03 ` [PATCH 36/46] drm/i915: Remove last traces of exec-id (GEM_BUSY) Chris Wilson
2019-02-06 13:03 ` [PATCH 37/46] drm/i915: Re-arrange execbuf so context is known before engine Chris Wilson
2019-02-06 13:03 ` [PATCH 38/46] drm/i915: Allow a context to define its set of engines Chris Wilson
2019-02-25 10:41 ` Tvrtko Ursulin
2019-02-25 10:47 ` Chris Wilson
2019-02-06 13:03 ` [PATCH 39/46] drm/i915: Extend I915_CONTEXT_PARAM_SSEU to support local ctx->engine[] Chris Wilson
2019-02-06 13:03 ` [PATCH 40/46] drm/i915: Pass around the intel_context Chris Wilson
2019-02-06 13:03 ` [PATCH 41/46] drm/i915: Split struct intel_context definition to its own header Chris Wilson
2019-02-06 13:03 ` [PATCH 42/46] drm/i915: Move over to intel_context_lookup() Chris Wilson
2019-02-06 14:27 ` [PATCH] " Chris Wilson
2019-02-06 13:03 ` [PATCH 43/46] drm/i915: Load balancing across a virtual engine Chris Wilson
2019-02-06 13:03 ` [PATCH 44/46] drm/i915: Extend execution fence to support a callback Chris Wilson
2019-02-06 13:03 ` [PATCH 45/46] drm/i915/execlists: Virtual engine bonding Chris Wilson
2019-02-06 13:03 ` [PATCH 46/46] drm/i915: Allow specification of parallel execbuf Chris Wilson
2019-02-06 13:52 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/46] drm/i915: Hack and slash, throttle execbuffer hogs Patchwork
2019-02-06 14:09 ` ✗ Fi.CI.BAT: failure " Patchwork
2019-02-06 14:11 ` ✗ Fi.CI.SPARSE: warning " Patchwork
2019-02-06 14:37 ` ✗ Fi.CI.CHECKPATCH: warning for series starting with [01/46] drm/i915: Hack and slash, throttle execbuffer hogs (rev2) Patchwork
2019-02-06 14:55 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-02-06 14:56 ` ✓ Fi.CI.BAT: success " Patchwork
2019-02-06 16:18 ` ✗ Fi.CI.IGT: failure " Patchwork
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=d38294ae-cfaa-ed62-16d6-6398202bdde2@linux.intel.com \
--to=tvrtko.ursulin@linux.intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox