dri-devel.lists.freedesktop.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 4/4] amdgpu: use sync file for shared semaphores
       [not found] ` <20170314005054.7487-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-03-14  0:50   ` Dave Airlie
       [not found]     ` <20170314005054.7487-6-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 14+ messages in thread
From: Dave Airlie @ 2017-03-14  0:50 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Dave Airlie <airlied@redhat.com>

This creates a new interface for amdgpu with ioctls to
create/destroy/import and export shared semaphores using
sem object backed by the sync_file code. The semaphores
are not installed as fd (except for export), but rather
like other driver internal objects in an idr. The idr
holds the initial reference on the sync file.

The command submission interface is enhanced with two new
chunks, one for semaphore waiting, one for semaphore signalling
and just takes a list of handles for each.

This is based on work originally done by David Zhou at AMD,
with input from Christian Konig on what things should look like.

NOTE: this interface addition needs a version bump to expose
it to userspace.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile     |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  12 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  70 ++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   8 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c | 204 ++++++++++++++++++++++++++++++++
 include/uapi/drm/amdgpu_drm.h           |  28 +++++
 6 files changed, 322 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2814aad..404bcba 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -24,7 +24,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
 	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
-	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
+	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_sem.o
 
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c1b9135..4ed8811 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -702,6 +702,8 @@ struct amdgpu_fpriv {
 	struct mutex		bo_list_lock;
 	struct idr		bo_list_handles;
 	struct amdgpu_ctx_mgr	ctx_mgr;
+	spinlock_t		sem_handles_lock;
+	struct idr		sem_handles;
 };
 
 /*
@@ -1814,5 +1816,15 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
 		       uint64_t addr, struct amdgpu_bo **bo);
 int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
 
+int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *filp);
+void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle);
+int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
+				 uint32_t handle,
+				 struct dma_fence *fence);
+int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
+			       struct amdgpu_fpriv *fpriv,
+			       struct amdgpu_sync *sync,
+			       uint32_t handle);
 #include "amdgpu_object.h"
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 4671432..80fc94b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -217,6 +217,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 			break;
 
 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
+		case AMDGPU_CHUNK_ID_SEM_WAIT:
+		case AMDGPU_CHUNK_ID_SEM_SIGNAL:
 			break;
 
 		default:
@@ -1009,6 +1011,28 @@ static int amdgpu_process_fence_dep(struct amdgpu_device *adev,
 	return 0;
 }
 
+static int amdgpu_process_sem_wait_dep(struct amdgpu_device *adev,
+				       struct amdgpu_cs_parser *p,
+				       struct amdgpu_cs_chunk *chunk)
+{
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	unsigned num_deps;
+	int i, r;
+	struct drm_amdgpu_cs_chunk_sem *deps;
+
+	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+	for (i = 0; i < num_deps; ++i) {
+		r = amdgpu_sem_lookup_and_sync(adev, fpriv, &p->job->sync,
+					       deps[i].handle);
+		if (r)
+			return r;
+	}
+	return 0;
+}
+
 static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 				  struct amdgpu_cs_parser *p)
 {
@@ -1023,12 +1047,56 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 			r = amdgpu_process_fence_dep(adev, p, chunk);
 			if (r)
 				return r;
+		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) {
+			r = amdgpu_process_sem_wait_dep(adev, p, chunk);
+			if (r)
+				return r;
 		}
 	}
 
 	return 0;
 }
 
+static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p,
+					 struct amdgpu_cs_chunk *chunk,
+					 struct dma_fence *fence)
+{
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	unsigned num_deps;
+	int i, r;
+	struct drm_amdgpu_cs_chunk_sem *deps;
+
+	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+	for (i = 0; i < num_deps; ++i) {
+		r = amdgpu_sem_lookup_and_signal(fpriv, deps[i].handle,
+						 fence);
+		if (r)
+			return r;
+	}
+	return 0;
+}
+
+static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
+{
+	int i, r;
+
+	for (i = 0; i < p->nchunks; ++i) {
+		struct amdgpu_cs_chunk *chunk;
+
+		chunk = &p->chunks[i];
+
+		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_SIGNAL) {
+			r = amdgpu_process_sem_signal_dep(p, chunk, p->fence);
+			if (r)
+				return r;
+		}
+	}
+	return 0;
+}
+
 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 			    union drm_amdgpu_cs *cs)
 {
@@ -1056,7 +1124,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	trace_amdgpu_cs_ioctl(job);
 	amd_sched_entity_push_job(&job->base);
 
-	return 0;
+	return amdgpu_cs_post_dependencies(p);
 }
 
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 61d94c7..013aed1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -664,6 +664,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 	mutex_init(&fpriv->bo_list_lock);
 	idr_init(&fpriv->bo_list_handles);
 
+	spin_lock_init(&fpriv->sem_handles_lock);
+	idr_init(&fpriv->sem_handles);
 	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
 
 	file_priv->driver_priv = fpriv;
@@ -689,6 +691,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
 	struct amdgpu_bo_list *list;
+	struct amdgpu_sem *sem;
 	int handle;
 
 	if (!fpriv)
@@ -715,6 +718,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	idr_destroy(&fpriv->bo_list_handles);
 	mutex_destroy(&fpriv->bo_list_lock);
 
+	idr_for_each_entry(&fpriv->sem_handles, sem, handle)
+		amdgpu_sem_destroy(fpriv, handle);
+	idr_destroy(&fpriv->sem_handles);
+
 	kfree(fpriv);
 	file_priv->driver_priv = NULL;
 
@@ -896,6 +903,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_SEM, amdgpu_sem_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 };
 const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
new file mode 100644
index 0000000..066520a
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chunming Zhou <david1.zhou@amd.com>
+ */
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/anon_inodes.h>
+#include <linux/sync_file.h>
+#include "amdgpu.h"
+#include <drm/drmP.h>
+
+static inline struct sync_file *amdgpu_sync_file_lookup(struct amdgpu_fpriv *fpriv, u32 handle)
+{
+	struct sync_file *sync_file;
+
+	spin_lock(&fpriv->sem_handles_lock);
+
+	/* Check if we currently have a reference on the object */
+	sync_file = idr_find(&fpriv->sem_handles, handle);
+
+	spin_unlock(&fpriv->sem_handles_lock);
+
+	return sync_file;
+}
+
+static int amdgpu_sem_create(struct amdgpu_fpriv *fpriv, u32 *handle)
+{
+	struct sync_file *sync_file = sync_file_alloc();
+	int ret;
+
+	if (!sync_file)
+		return -ENOMEM;
+
+	snprintf(sync_file->name, sizeof(sync_file->name), "sync_sem");
+
+	/* we get a file reference and we use that in the idr. */
+	idr_preload(GFP_KERNEL);
+	spin_lock(&fpriv->sem_handles_lock);
+
+	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
+
+	spin_unlock(&fpriv->sem_handles_lock);
+	idr_preload_end();
+
+	if (ret < 0)
+		return ret;
+
+	*handle = ret;
+	return 0;
+}
+
+void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle)
+{
+	struct sync_file *sync_file = amdgpu_sync_file_lookup(fpriv, handle);
+	if (!sync_file)
+		return;
+
+	spin_lock(&fpriv->sem_handles_lock);
+	idr_remove(&fpriv->sem_handles, handle);
+	spin_unlock(&fpriv->sem_handles_lock);
+
+	fput(sync_file->file);
+}
+
+
+int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
+				 uint32_t handle,
+				 struct dma_fence *fence)
+{
+	struct sync_file *sync_file;
+	struct dma_fence *old_fence;
+	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
+	if (!sync_file)
+		return -EINVAL;
+
+	old_fence = sync_file_replace_fence(sync_file, fence);
+	dma_fence_put(old_fence);
+	return 0;
+}
+
+static int amdgpu_sem_import(struct amdgpu_fpriv *fpriv,
+				       int fd, u32 *handle)
+{
+	struct sync_file *sync_file = sync_file_fdget(fd);
+	int ret;
+
+	if (!sync_file)
+		return -EINVAL;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&fpriv->sem_handles_lock);
+
+	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
+
+	spin_unlock(&fpriv->sem_handles_lock);
+	idr_preload_end();
+
+	fput(sync_file->file);
+	if (ret < 0)
+		goto err_out;
+
+	*handle = ret;
+	return 0;
+err_out:
+	return ret;
+
+}
+
+static int amdgpu_sem_export(struct amdgpu_fpriv *fpriv,
+			     u32 handle, int *fd)
+{
+	struct sync_file *sync_file;
+	int ret;
+
+	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
+	if (!sync_file)
+		return -EINVAL;
+
+	ret = get_unused_fd_flags(O_CLOEXEC);
+	if (ret < 0)
+		goto err_put_file;
+
+	fd_install(ret, sync_file->file);
+
+	*fd = ret;
+	return 0;
+err_put_file:
+	return ret;
+}
+
+int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
+			       struct amdgpu_fpriv *fpriv,
+			       struct amdgpu_sync *sync,
+			       uint32_t handle)
+{
+	int r;
+	struct sync_file *sync_file;
+	struct dma_fence *fence;
+
+	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
+	if (!sync_file)
+		return -EINVAL;
+
+	fence = sync_file_replace_fence(sync_file, NULL);
+	r = amdgpu_sync_fence(adev, sync, fence);
+	dma_fence_put(fence);
+
+	return r;
+}
+
+int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *filp)
+{
+	union drm_amdgpu_sem *args = data;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	int r = 0;
+
+	switch (args->in.op) {
+	case AMDGPU_SEM_OP_CREATE_SEM:
+		r = amdgpu_sem_create(fpriv, &args->out.handle);
+		break;
+	case AMDGPU_SEM_OP_IMPORT_SEM:
+		r = amdgpu_sem_import(fpriv, args->in.handle, &args->out.handle);
+		break;
+	case AMDGPU_SEM_OP_EXPORT_SEM:
+		r = amdgpu_sem_export(fpriv, args->in.handle, &args->out.fd);
+		break;
+	case AMDGPU_SEM_OP_DESTROY_SEM:
+		amdgpu_sem_destroy(fpriv, args->in.handle);
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 5797283..646b103 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -51,6 +51,7 @@ extern "C" {
 #define DRM_AMDGPU_GEM_OP		0x10
 #define DRM_AMDGPU_GEM_USERPTR		0x11
 #define DRM_AMDGPU_WAIT_FENCES		0x12
+#define DRM_AMDGPU_SEM                  0x13
 
 #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -65,6 +66,7 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
 #define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
 #define DRM_IOCTL_AMDGPU_WAIT_FENCES	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
+#define DRM_IOCTL_AMDGPU_SEM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem)
 
 #define AMDGPU_GEM_DOMAIN_CPU		0x1
 #define AMDGPU_GEM_DOMAIN_GTT		0x2
@@ -335,6 +337,26 @@ union drm_amdgpu_wait_fences {
 	struct drm_amdgpu_wait_fences_out out;
 };
 
+#define AMDGPU_SEM_OP_CREATE_SEM 0
+#define AMDGPU_SEM_OP_IMPORT_SEM 1
+#define AMDGPU_SEM_OP_EXPORT_SEM 2
+#define AMDGPU_SEM_OP_DESTROY_SEM 3
+
+struct drm_amdgpu_sem_in {
+	__u32 op;
+	__u32 handle;
+};
+
+struct drm_amdgpu_sem_out {
+	__u32 fd;
+	__u32 handle;
+};
+
+union drm_amdgpu_sem {
+	struct drm_amdgpu_sem_in in;
+	struct drm_amdgpu_sem_out out;
+};
+
 #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO	0
 #define AMDGPU_GEM_OP_SET_PLACEMENT		1
 
@@ -390,6 +412,8 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_CHUNK_ID_IB		0x01
 #define AMDGPU_CHUNK_ID_FENCE		0x02
 #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
+#define AMDGPU_CHUNK_ID_SEM_WAIT        0x04
+#define AMDGPU_CHUNK_ID_SEM_SIGNAL      0x05
 
 struct drm_amdgpu_cs_chunk {
 	__u32		chunk_id;
@@ -454,6 +478,10 @@ struct drm_amdgpu_cs_chunk_fence {
 	__u32 offset;
 };
 
+struct drm_amdgpu_cs_chunk_sem {
+	__u32 handle;
+};
+
 struct drm_amdgpu_cs_chunk_data {
 	union {
 		struct drm_amdgpu_cs_chunk_ib		ib_data;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/4] amdgpu: use sync file for shared semaphores
       [not found] ` <20170315042749.13259-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-03-15  4:27   ` Dave Airlie
  0 siblings, 0 replies; 14+ messages in thread
From: Dave Airlie @ 2017-03-15  4:27 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Dave Airlie <airlied@redhat.com>

This creates a new interface for amdgpu with ioctls to
create/destroy/import and export shared semaphores using
sem object backed by the sync_file code. The semaphores
are not installed as fd (except for export), but rather
like other driver internal objects in an idr. The idr
holds the initial reference on the sync file.

The command submission interface is enhanced with two new
chunks, one for semaphore waiting, one for semaphore signalling
and just takes a list of handles for each.

This is based on work originally done by David Zhou at AMD,
with input from Christian Konig on what things should look like.

NOTE: this interface addition needs a version bump to expose
it to userspace.

v1.1: keep file reference on import.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile     |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  12 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  70 ++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   8 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c | 203 ++++++++++++++++++++++++++++++++
 include/uapi/drm/amdgpu_drm.h           |  28 +++++
 6 files changed, 321 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2814aad..404bcba 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -24,7 +24,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
 	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
-	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
+	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_sem.o
 
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c1b9135..4ed8811 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -702,6 +702,8 @@ struct amdgpu_fpriv {
 	struct mutex		bo_list_lock;
 	struct idr		bo_list_handles;
 	struct amdgpu_ctx_mgr	ctx_mgr;
+	spinlock_t		sem_handles_lock;
+	struct idr		sem_handles;
 };
 
 /*
@@ -1814,5 +1816,15 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
 		       uint64_t addr, struct amdgpu_bo **bo);
 int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
 
+int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *filp);
+void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle);
+int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
+				 uint32_t handle,
+				 struct dma_fence *fence);
+int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
+			       struct amdgpu_fpriv *fpriv,
+			       struct amdgpu_sync *sync,
+			       uint32_t handle);
 #include "amdgpu_object.h"
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 4671432..80fc94b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -217,6 +217,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 			break;
 
 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
+		case AMDGPU_CHUNK_ID_SEM_WAIT:
+		case AMDGPU_CHUNK_ID_SEM_SIGNAL:
 			break;
 
 		default:
@@ -1009,6 +1011,28 @@ static int amdgpu_process_fence_dep(struct amdgpu_device *adev,
 	return 0;
 }
 
+static int amdgpu_process_sem_wait_dep(struct amdgpu_device *adev,
+				       struct amdgpu_cs_parser *p,
+				       struct amdgpu_cs_chunk *chunk)
+{
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	unsigned num_deps;
+	int i, r;
+	struct drm_amdgpu_cs_chunk_sem *deps;
+
+	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+	for (i = 0; i < num_deps; ++i) {
+		r = amdgpu_sem_lookup_and_sync(adev, fpriv, &p->job->sync,
+					       deps[i].handle);
+		if (r)
+			return r;
+	}
+	return 0;
+}
+
 static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 				  struct amdgpu_cs_parser *p)
 {
@@ -1023,12 +1047,56 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 			r = amdgpu_process_fence_dep(adev, p, chunk);
 			if (r)
 				return r;
+		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) {
+			r = amdgpu_process_sem_wait_dep(adev, p, chunk);
+			if (r)
+				return r;
 		}
 	}
 
 	return 0;
 }
 
+static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p,
+					 struct amdgpu_cs_chunk *chunk,
+					 struct dma_fence *fence)
+{
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	unsigned num_deps;
+	int i, r;
+	struct drm_amdgpu_cs_chunk_sem *deps;
+
+	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+	for (i = 0; i < num_deps; ++i) {
+		r = amdgpu_sem_lookup_and_signal(fpriv, deps[i].handle,
+						 fence);
+		if (r)
+			return r;
+	}
+	return 0;
+}
+
+static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
+{
+	int i, r;
+
+	for (i = 0; i < p->nchunks; ++i) {
+		struct amdgpu_cs_chunk *chunk;
+
+		chunk = &p->chunks[i];
+
+		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_SIGNAL) {
+			r = amdgpu_process_sem_signal_dep(p, chunk, p->fence);
+			if (r)
+				return r;
+		}
+	}
+	return 0;
+}
+
 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 			    union drm_amdgpu_cs *cs)
 {
@@ -1056,7 +1124,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	trace_amdgpu_cs_ioctl(job);
 	amd_sched_entity_push_job(&job->base);
 
-	return 0;
+	return amdgpu_cs_post_dependencies(p);
 }
 
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 61d94c7..013aed1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -664,6 +664,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 	mutex_init(&fpriv->bo_list_lock);
 	idr_init(&fpriv->bo_list_handles);
 
+	spin_lock_init(&fpriv->sem_handles_lock);
+	idr_init(&fpriv->sem_handles);
 	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
 
 	file_priv->driver_priv = fpriv;
@@ -689,6 +691,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
 	struct amdgpu_bo_list *list;
+	struct amdgpu_sem *sem;
 	int handle;
 
 	if (!fpriv)
@@ -715,6 +718,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	idr_destroy(&fpriv->bo_list_handles);
 	mutex_destroy(&fpriv->bo_list_lock);
 
+	idr_for_each_entry(&fpriv->sem_handles, sem, handle)
+		amdgpu_sem_destroy(fpriv, handle);
+	idr_destroy(&fpriv->sem_handles);
+
 	kfree(fpriv);
 	file_priv->driver_priv = NULL;
 
@@ -896,6 +903,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_SEM, amdgpu_sem_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 };
 const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
new file mode 100644
index 0000000..94a637f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chunming Zhou <david1.zhou@amd.com>
+ */
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/anon_inodes.h>
+#include <linux/sync_file.h>
+#include "amdgpu.h"
+#include <drm/drmP.h>
+
+static inline struct sync_file *amdgpu_sync_file_lookup(struct amdgpu_fpriv *fpriv, u32 handle)
+{
+	struct sync_file *sync_file;
+
+	spin_lock(&fpriv->sem_handles_lock);
+
+	/* Check if we currently have a reference on the object */
+	sync_file = idr_find(&fpriv->sem_handles, handle);
+
+	spin_unlock(&fpriv->sem_handles_lock);
+
+	return sync_file;
+}
+
+static int amdgpu_sem_create(struct amdgpu_fpriv *fpriv, u32 *handle)
+{
+	struct sync_file *sync_file = sync_file_alloc();
+	int ret;
+
+	if (!sync_file)
+		return -ENOMEM;
+
+	snprintf(sync_file->name, sizeof(sync_file->name), "sync_sem");
+
+	/* we get a file reference and we use that in the idr. */
+	idr_preload(GFP_KERNEL);
+	spin_lock(&fpriv->sem_handles_lock);
+
+	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
+
+	spin_unlock(&fpriv->sem_handles_lock);
+	idr_preload_end();
+
+	if (ret < 0)
+		return ret;
+
+	*handle = ret;
+	return 0;
+}
+
+void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle)
+{
+	struct sync_file *sync_file = amdgpu_sync_file_lookup(fpriv, handle);
+	if (!sync_file)
+		return;
+
+	spin_lock(&fpriv->sem_handles_lock);
+	idr_remove(&fpriv->sem_handles, handle);
+	spin_unlock(&fpriv->sem_handles_lock);
+
+	fput(sync_file->file);
+}
+
+
+int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
+				 uint32_t handle,
+				 struct dma_fence *fence)
+{
+	struct sync_file *sync_file;
+	struct dma_fence *old_fence;
+	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
+	if (!sync_file)
+		return -EINVAL;
+
+	old_fence = sync_file_replace_fence(sync_file, fence);
+	dma_fence_put(old_fence);
+	return 0;
+}
+
+static int amdgpu_sem_import(struct amdgpu_fpriv *fpriv,
+				       int fd, u32 *handle)
+{
+	struct sync_file *sync_file = sync_file_fdget(fd);
+	int ret;
+
+	if (!sync_file)
+		return -EINVAL;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&fpriv->sem_handles_lock);
+
+	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
+
+	spin_unlock(&fpriv->sem_handles_lock);
+	idr_preload_end();
+
+	if (ret < 0)
+		goto err_out;
+
+	*handle = ret;
+	return 0;
+err_out:
+	return ret;
+
+}
+
+static int amdgpu_sem_export(struct amdgpu_fpriv *fpriv,
+			     u32 handle, int *fd)
+{
+	struct sync_file *sync_file;
+	int ret;
+
+	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
+	if (!sync_file)
+		return -EINVAL;
+
+	ret = get_unused_fd_flags(O_CLOEXEC);
+	if (ret < 0)
+		goto err_put_file;
+
+	fd_install(ret, sync_file->file);
+
+	*fd = ret;
+	return 0;
+err_put_file:
+	return ret;
+}
+
+int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
+			       struct amdgpu_fpriv *fpriv,
+			       struct amdgpu_sync *sync,
+			       uint32_t handle)
+{
+	int r;
+	struct sync_file *sync_file;
+	struct dma_fence *fence;
+
+	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
+	if (!sync_file)
+		return -EINVAL;
+
+	fence = sync_file_replace_fence(sync_file, NULL);
+	r = amdgpu_sync_fence(adev, sync, fence);
+	dma_fence_put(fence);
+
+	return r;
+}
+
+int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *filp)
+{
+	union drm_amdgpu_sem *args = data;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	int r = 0;
+
+	switch (args->in.op) {
+	case AMDGPU_SEM_OP_CREATE_SEM:
+		r = amdgpu_sem_create(fpriv, &args->out.handle);
+		break;
+	case AMDGPU_SEM_OP_IMPORT_SEM:
+		r = amdgpu_sem_import(fpriv, args->in.handle, &args->out.handle);
+		break;
+	case AMDGPU_SEM_OP_EXPORT_SEM:
+		r = amdgpu_sem_export(fpriv, args->in.handle, &args->out.fd);
+		break;
+	case AMDGPU_SEM_OP_DESTROY_SEM:
+		amdgpu_sem_destroy(fpriv, args->in.handle);
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 5797283..646b103 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -51,6 +51,7 @@ extern "C" {
 #define DRM_AMDGPU_GEM_OP		0x10
 #define DRM_AMDGPU_GEM_USERPTR		0x11
 #define DRM_AMDGPU_WAIT_FENCES		0x12
+#define DRM_AMDGPU_SEM                  0x13
 
 #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -65,6 +66,7 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
 #define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
 #define DRM_IOCTL_AMDGPU_WAIT_FENCES	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
+#define DRM_IOCTL_AMDGPU_SEM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem)
 
 #define AMDGPU_GEM_DOMAIN_CPU		0x1
 #define AMDGPU_GEM_DOMAIN_GTT		0x2
@@ -335,6 +337,26 @@ union drm_amdgpu_wait_fences {
 	struct drm_amdgpu_wait_fences_out out;
 };
 
+#define AMDGPU_SEM_OP_CREATE_SEM 0
+#define AMDGPU_SEM_OP_IMPORT_SEM 1
+#define AMDGPU_SEM_OP_EXPORT_SEM 2
+#define AMDGPU_SEM_OP_DESTROY_SEM 3
+
+struct drm_amdgpu_sem_in {
+	__u32 op;
+	__u32 handle;
+};
+
+struct drm_amdgpu_sem_out {
+	__u32 fd;
+	__u32 handle;
+};
+
+union drm_amdgpu_sem {
+	struct drm_amdgpu_sem_in in;
+	struct drm_amdgpu_sem_out out;
+};
+
 #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO	0
 #define AMDGPU_GEM_OP_SET_PLACEMENT		1
 
@@ -390,6 +412,8 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_CHUNK_ID_IB		0x01
 #define AMDGPU_CHUNK_ID_FENCE		0x02
 #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
+#define AMDGPU_CHUNK_ID_SEM_WAIT        0x04
+#define AMDGPU_CHUNK_ID_SEM_SIGNAL      0x05
 
 struct drm_amdgpu_cs_chunk {
 	__u32		chunk_id;
@@ -454,6 +478,10 @@ struct drm_amdgpu_cs_chunk_fence {
 	__u32 offset;
 };
 
+struct drm_amdgpu_cs_chunk_sem {
+	__u32 handle;
+};
+
 struct drm_amdgpu_cs_chunk_data {
 	union {
 		struct drm_amdgpu_cs_chunk_ib		ib_data;
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/4] amdgpu: use sync file for shared semaphores
       [not found]     ` <20170314005054.7487-6-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-03-15  9:01       ` Daniel Vetter
       [not found]         ` <20170315090129.fneo5gafrz2jec32-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
  0 siblings, 1 reply; 14+ messages in thread
From: Daniel Vetter @ 2017-03-15  9:01 UTC (permalink / raw)
  To: Dave Airlie
  Cc: dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Tue, Mar 14, 2017 at 10:50:54AM +1000, Dave Airlie wrote:
> From: Dave Airlie <airlied@redhat.com>
> 
> This creates a new interface for amdgpu with ioctls to
> create/destroy/import and export shared semaphores using
> sem object backed by the sync_file code. The semaphores
> are not installed as fd (except for export), but rather
> like other driver internal objects in an idr. The idr
> holds the initial reference on the sync file.
> 
> The command submission interface is enhanced with two new
> chunks, one for semaphore waiting, one for semaphore signalling
> and just takes a list of handles for each.
> 
> This is based on work originally done by David Zhou at AMD,
> with input from Christian Konig on what things should look like.
> 
> NOTE: this interface addition needs a version bump to expose
> it to userspace.
> 
> Signed-off-by: Dave Airlie <airlied@redhat.com>

Another uapi corner case: Since you allow semaphores to be created before
they have a first fence attached, that essentially creates future fences.
I.e. sync_file fd with no fence yet attached. We want that anyway, but
maybe not together with semaphores (since there's some more implications).

I think it'd be good to attach a dummy fence which already starts out as
signalled to sync_file->fence for semaphores. That way userspace can't go
evil, create a semaphore, then pass it to a driver which then promptly
oopses or worse because it assumes then when it has a sync_file, it also
has a fence. And the dummy fence could be globally shared, so not really
overhead.
-Daniel

> ---
>  drivers/gpu/drm/amd/amdgpu/Makefile     |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  12 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  70 ++++++++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   8 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c | 204 ++++++++++++++++++++++++++++++++
>  include/uapi/drm/amdgpu_drm.h           |  28 +++++
>  6 files changed, 322 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 2814aad..404bcba 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -24,7 +24,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>  	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
>  	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
>  	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
> -	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
> +	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_sem.o
>  
>  # add asic specific block
>  amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index c1b9135..4ed8811 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -702,6 +702,8 @@ struct amdgpu_fpriv {
>  	struct mutex		bo_list_lock;
>  	struct idr		bo_list_handles;
>  	struct amdgpu_ctx_mgr	ctx_mgr;
> +	spinlock_t		sem_handles_lock;
> +	struct idr		sem_handles;
>  };
>  
>  /*
> @@ -1814,5 +1816,15 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>  		       uint64_t addr, struct amdgpu_bo **bo);
>  int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
>  
> +int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
> +		     struct drm_file *filp);
> +void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle);
> +int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
> +				 uint32_t handle,
> +				 struct dma_fence *fence);
> +int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
> +			       struct amdgpu_fpriv *fpriv,
> +			       struct amdgpu_sync *sync,
> +			       uint32_t handle);
>  #include "amdgpu_object.h"
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 4671432..80fc94b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -217,6 +217,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
>  			break;
>  
>  		case AMDGPU_CHUNK_ID_DEPENDENCIES:
> +		case AMDGPU_CHUNK_ID_SEM_WAIT:
> +		case AMDGPU_CHUNK_ID_SEM_SIGNAL:
>  			break;
>  
>  		default:
> @@ -1009,6 +1011,28 @@ static int amdgpu_process_fence_dep(struct amdgpu_device *adev,
>  	return 0;
>  }
>  
> +static int amdgpu_process_sem_wait_dep(struct amdgpu_device *adev,
> +				       struct amdgpu_cs_parser *p,
> +				       struct amdgpu_cs_chunk *chunk)
> +{
> +	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
> +	unsigned num_deps;
> +	int i, r;
> +	struct drm_amdgpu_cs_chunk_sem *deps;
> +
> +	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
> +	num_deps = chunk->length_dw * 4 /
> +		sizeof(struct drm_amdgpu_cs_chunk_sem);
> +
> +	for (i = 0; i < num_deps; ++i) {
> +		r = amdgpu_sem_lookup_and_sync(adev, fpriv, &p->job->sync,
> +					       deps[i].handle);
> +		if (r)
> +			return r;
> +	}
> +	return 0;
> +}
> +
>  static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
>  				  struct amdgpu_cs_parser *p)
>  {
> @@ -1023,12 +1047,56 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
>  			r = amdgpu_process_fence_dep(adev, p, chunk);
>  			if (r)
>  				return r;
> +		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) {
> +			r = amdgpu_process_sem_wait_dep(adev, p, chunk);
> +			if (r)
> +				return r;
>  		}
>  	}
>  
>  	return 0;
>  }
>  
> +static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p,
> +					 struct amdgpu_cs_chunk *chunk,
> +					 struct dma_fence *fence)
> +{
> +	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
> +	unsigned num_deps;
> +	int i, r;
> +	struct drm_amdgpu_cs_chunk_sem *deps;
> +
> +	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
> +	num_deps = chunk->length_dw * 4 /
> +		sizeof(struct drm_amdgpu_cs_chunk_sem);
> +
> +	for (i = 0; i < num_deps; ++i) {
> +		r = amdgpu_sem_lookup_and_signal(fpriv, deps[i].handle,
> +						 fence);
> +		if (r)
> +			return r;
> +	}
> +	return 0;
> +}
> +
> +static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
> +{
> +	int i, r;
> +
> +	for (i = 0; i < p->nchunks; ++i) {
> +		struct amdgpu_cs_chunk *chunk;
> +
> +		chunk = &p->chunks[i];
> +
> +		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_SIGNAL) {
> +			r = amdgpu_process_sem_signal_dep(p, chunk, p->fence);
> +			if (r)
> +				return r;
> +		}
> +	}
> +	return 0;
> +}
> +
>  static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>  			    union drm_amdgpu_cs *cs)
>  {
> @@ -1056,7 +1124,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>  	trace_amdgpu_cs_ioctl(job);
>  	amd_sched_entity_push_job(&job->base);
>  
> -	return 0;
> +	return amdgpu_cs_post_dependencies(p);
>  }
>  
>  int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 61d94c7..013aed1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -664,6 +664,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
>  	mutex_init(&fpriv->bo_list_lock);
>  	idr_init(&fpriv->bo_list_handles);
>  
> +	spin_lock_init(&fpriv->sem_handles_lock);
> +	idr_init(&fpriv->sem_handles);
>  	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
>  
>  	file_priv->driver_priv = fpriv;
> @@ -689,6 +691,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>  	struct amdgpu_device *adev = dev->dev_private;
>  	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
>  	struct amdgpu_bo_list *list;
> +	struct amdgpu_sem *sem;
>  	int handle;
>  
>  	if (!fpriv)
> @@ -715,6 +718,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>  	idr_destroy(&fpriv->bo_list_handles);
>  	mutex_destroy(&fpriv->bo_list_lock);
>  
> +	idr_for_each_entry(&fpriv->sem_handles, sem, handle)
> +		amdgpu_sem_destroy(fpriv, handle);
> +	idr_destroy(&fpriv->sem_handles);
> +
>  	kfree(fpriv);
>  	file_priv->driver_priv = NULL;
>  
> @@ -896,6 +903,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
>  	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>  	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> +	DRM_IOCTL_DEF_DRV(AMDGPU_SEM, amdgpu_sem_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>  };
>  const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
> new file mode 100644
> index 0000000..066520a
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
> @@ -0,0 +1,204 @@
> +/*
> + * Copyright 2016 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Chunming Zhou <david1.zhou@amd.com>
> + */
> +#include <linux/file.h>
> +#include <linux/fs.h>
> +#include <linux/kernel.h>
> +#include <linux/poll.h>
> +#include <linux/seq_file.h>
> +#include <linux/export.h>
> +#include <linux/sched.h>
> +#include <linux/slab.h>
> +#include <linux/uaccess.h>
> +#include <linux/anon_inodes.h>
> +#include <linux/sync_file.h>
> +#include "amdgpu.h"
> +#include <drm/drmP.h>
> +
> +static inline struct sync_file *amdgpu_sync_file_lookup(struct amdgpu_fpriv *fpriv, u32 handle)
> +{
> +	struct sync_file *sync_file;
> +
> +	spin_lock(&fpriv->sem_handles_lock);
> +
> +	/* Check if we currently have a reference on the object */
> +	sync_file = idr_find(&fpriv->sem_handles, handle);
> +
> +	spin_unlock(&fpriv->sem_handles_lock);
> +
> +	return sync_file;
> +}
> +
> +static int amdgpu_sem_create(struct amdgpu_fpriv *fpriv, u32 *handle)
> +{
> +	struct sync_file *sync_file = sync_file_alloc();
> +	int ret;
> +
> +	if (!sync_file)
> +		return -ENOMEM;
> +
> +	snprintf(sync_file->name, sizeof(sync_file->name), "sync_sem");
> +
> +	/* we get a file reference and we use that in the idr. */
> +	idr_preload(GFP_KERNEL);
> +	spin_lock(&fpriv->sem_handles_lock);
> +
> +	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
> +
> +	spin_unlock(&fpriv->sem_handles_lock);
> +	idr_preload_end();
> +
> +	if (ret < 0)
> +		return ret;
> +
> +	*handle = ret;
> +	return 0;
> +}
> +
> +void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle)
> +{
> +	struct sync_file *sync_file = amdgpu_sync_file_lookup(fpriv, handle);
> +	if (!sync_file)
> +		return;
> +
> +	spin_lock(&fpriv->sem_handles_lock);
> +	idr_remove(&fpriv->sem_handles, handle);
> +	spin_unlock(&fpriv->sem_handles_lock);
> +
> +	fput(sync_file->file);
> +}
> +
> +
> +int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
> +				 uint32_t handle,
> +				 struct dma_fence *fence)
> +{
> +	struct sync_file *sync_file;
> +	struct dma_fence *old_fence;
> +	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
> +	if (!sync_file)
> +		return -EINVAL;
> +
> +	old_fence = sync_file_replace_fence(sync_file, fence);
> +	dma_fence_put(old_fence);
> +	return 0;
> +}
> +
> +static int amdgpu_sem_import(struct amdgpu_fpriv *fpriv,
> +				       int fd, u32 *handle)
> +{
> +	struct sync_file *sync_file = sync_file_fdget(fd);
> +	int ret;
> +
> +	if (!sync_file)
> +		return -EINVAL;
> +
> +	idr_preload(GFP_KERNEL);
> +	spin_lock(&fpriv->sem_handles_lock);
> +
> +	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
> +
> +	spin_unlock(&fpriv->sem_handles_lock);
> +	idr_preload_end();
> +
> +	fput(sync_file->file);
> +	if (ret < 0)
> +		goto err_out;
> +
> +	*handle = ret;
> +	return 0;
> +err_out:
> +	return ret;
> +
> +}
> +
> +static int amdgpu_sem_export(struct amdgpu_fpriv *fpriv,
> +			     u32 handle, int *fd)
> +{
> +	struct sync_file *sync_file;
> +	int ret;
> +
> +	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
> +	if (!sync_file)
> +		return -EINVAL;
> +
> +	ret = get_unused_fd_flags(O_CLOEXEC);
> +	if (ret < 0)
> +		goto err_put_file;
> +
> +	fd_install(ret, sync_file->file);
> +
> +	*fd = ret;
> +	return 0;
> +err_put_file:
> +	return ret;
> +}
> +
> +int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
> +			       struct amdgpu_fpriv *fpriv,
> +			       struct amdgpu_sync *sync,
> +			       uint32_t handle)
> +{
> +	int r;
> +	struct sync_file *sync_file;
> +	struct dma_fence *fence;
> +
> +	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
> +	if (!sync_file)
> +		return -EINVAL;
> +
> +	fence = sync_file_replace_fence(sync_file, NULL);
> +	r = amdgpu_sync_fence(adev, sync, fence);
> +	dma_fence_put(fence);
> +
> +	return r;
> +}
> +
> +int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
> +		     struct drm_file *filp)
> +{
> +	union drm_amdgpu_sem *args = data;
> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> +	int r = 0;
> +
> +	switch (args->in.op) {
> +	case AMDGPU_SEM_OP_CREATE_SEM:
> +		r = amdgpu_sem_create(fpriv, &args->out.handle);
> +		break;
> +	case AMDGPU_SEM_OP_IMPORT_SEM:
> +		r = amdgpu_sem_import(fpriv, args->in.handle, &args->out.handle);
> +		break;
> +	case AMDGPU_SEM_OP_EXPORT_SEM:
> +		r = amdgpu_sem_export(fpriv, args->in.handle, &args->out.fd);
> +		break;
> +	case AMDGPU_SEM_OP_DESTROY_SEM:
> +		amdgpu_sem_destroy(fpriv, args->in.handle);
> +		break;
> +	default:
> +		r = -EINVAL;
> +		break;
> +	}
> +
> +	return r;
> +}
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 5797283..646b103 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -51,6 +51,7 @@ extern "C" {
>  #define DRM_AMDGPU_GEM_OP		0x10
>  #define DRM_AMDGPU_GEM_USERPTR		0x11
>  #define DRM_AMDGPU_WAIT_FENCES		0x12
> +#define DRM_AMDGPU_SEM                  0x13
>  
>  #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>  #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> @@ -65,6 +66,7 @@ extern "C" {
>  #define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
>  #define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
>  #define DRM_IOCTL_AMDGPU_WAIT_FENCES	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
> +#define DRM_IOCTL_AMDGPU_SEM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem)
>  
>  #define AMDGPU_GEM_DOMAIN_CPU		0x1
>  #define AMDGPU_GEM_DOMAIN_GTT		0x2
> @@ -335,6 +337,26 @@ union drm_amdgpu_wait_fences {
>  	struct drm_amdgpu_wait_fences_out out;
>  };
>  
> +#define AMDGPU_SEM_OP_CREATE_SEM 0
> +#define AMDGPU_SEM_OP_IMPORT_SEM 1
> +#define AMDGPU_SEM_OP_EXPORT_SEM 2
> +#define AMDGPU_SEM_OP_DESTROY_SEM 3
> +
> +struct drm_amdgpu_sem_in {
> +	__u32 op;
> +	__u32 handle;
> +};
> +
> +struct drm_amdgpu_sem_out {
> +	__u32 fd;
> +	__u32 handle;
> +};
> +
> +union drm_amdgpu_sem {
> +	struct drm_amdgpu_sem_in in;
> +	struct drm_amdgpu_sem_out out;
> +};
> +
>  #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO	0
>  #define AMDGPU_GEM_OP_SET_PLACEMENT		1
>  
> @@ -390,6 +412,8 @@ struct drm_amdgpu_gem_va {
>  #define AMDGPU_CHUNK_ID_IB		0x01
>  #define AMDGPU_CHUNK_ID_FENCE		0x02
>  #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
> +#define AMDGPU_CHUNK_ID_SEM_WAIT        0x04
> +#define AMDGPU_CHUNK_ID_SEM_SIGNAL      0x05
>  
>  struct drm_amdgpu_cs_chunk {
>  	__u32		chunk_id;
> @@ -454,6 +478,10 @@ struct drm_amdgpu_cs_chunk_fence {
>  	__u32 offset;
>  };
>  
> +struct drm_amdgpu_cs_chunk_sem {
> +	__u32 handle;
> +};
> +
>  struct drm_amdgpu_cs_chunk_data {
>  	union {
>  		struct drm_amdgpu_cs_chunk_ib		ib_data;
> -- 
> 2.7.4
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/4] amdgpu: use sync file for shared semaphores
       [not found]         ` <20170315090129.fneo5gafrz2jec32-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
@ 2017-03-15  9:43           ` Christian König
  2017-03-15  9:57             ` Daniel Vetter
  0 siblings, 1 reply; 14+ messages in thread
From: Christian König @ 2017-03-15  9:43 UTC (permalink / raw)
  To: Daniel Vetter, Dave Airlie
  Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 15.03.2017 um 10:01 schrieb Daniel Vetter:
> On Tue, Mar 14, 2017 at 10:50:54AM +1000, Dave Airlie wrote:
>> From: Dave Airlie <airlied@redhat.com>
>>
>> This creates a new interface for amdgpu with ioctls to
>> create/destroy/import and export shared semaphores using
>> sem object backed by the sync_file code. The semaphores
>> are not installed as fd (except for export), but rather
>> like other driver internal objects in an idr. The idr
>> holds the initial reference on the sync file.
>>
>> The command submission interface is enhanced with two new
>> chunks, one for semaphore waiting, one for semaphore signalling
>> and just takes a list of handles for each.
>>
>> This is based on work originally done by David Zhou at AMD,
>> with input from Christian Konig on what things should look like.
>>
>> NOTE: this interface addition needs a version bump to expose
>> it to userspace.
>>
>> Signed-off-by: Dave Airlie <airlied@redhat.com>
> Another uapi corner case: Since you allow semaphores to be created before
> they have a first fence attached, that essentially creates future fences.
> I.e. sync_file fd with no fence yet attached. We want that anyway, but
> maybe not together with semaphores (since there's some more implications).
>
> I think it'd be good to attach a dummy fence which already starts out as
> signalled to sync_file->fence for semaphores. That way userspace can't go
> evil, create a semaphore, then pass it to a driver which then promptly
> oopses or worse because it assumes then when it has a sync_file, it also
> has a fence. And the dummy fence could be globally shared, so not really
> overhead.

Sounds fishy to me, what happens in case of a race condition and the 
receiver sometimes waits on the dummy and sometimes on the real fence?

I would rather teach the users of sync_file to return a proper error 
code when you want to wait on a sync_file which doesn't have a fence yet.

Christian.

> -Daniel
>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/Makefile     |   2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  12 ++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  70 ++++++++++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   8 ++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c | 204 ++++++++++++++++++++++++++++++++
>>   include/uapi/drm/amdgpu_drm.h           |  28 +++++
>>   6 files changed, 322 insertions(+), 2 deletions(-)
>>   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index 2814aad..404bcba 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -24,7 +24,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>>   	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
>>   	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
>>   	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
>> -	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
>> +	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_sem.o
>>   
>>   # add asic specific block
>>   amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index c1b9135..4ed8811 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -702,6 +702,8 @@ struct amdgpu_fpriv {
>>   	struct mutex		bo_list_lock;
>>   	struct idr		bo_list_handles;
>>   	struct amdgpu_ctx_mgr	ctx_mgr;
>> +	spinlock_t		sem_handles_lock;
>> +	struct idr		sem_handles;
>>   };
>>   
>>   /*
>> @@ -1814,5 +1816,15 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
>>   		       uint64_t addr, struct amdgpu_bo **bo);
>>   int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
>>   
>> +int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
>> +		     struct drm_file *filp);
>> +void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle);
>> +int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
>> +				 uint32_t handle,
>> +				 struct dma_fence *fence);
>> +int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
>> +			       struct amdgpu_fpriv *fpriv,
>> +			       struct amdgpu_sync *sync,
>> +			       uint32_t handle);
>>   #include "amdgpu_object.h"
>>   #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 4671432..80fc94b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -217,6 +217,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
>>   			break;
>>   
>>   		case AMDGPU_CHUNK_ID_DEPENDENCIES:
>> +		case AMDGPU_CHUNK_ID_SEM_WAIT:
>> +		case AMDGPU_CHUNK_ID_SEM_SIGNAL:
>>   			break;
>>   
>>   		default:
>> @@ -1009,6 +1011,28 @@ static int amdgpu_process_fence_dep(struct amdgpu_device *adev,
>>   	return 0;
>>   }
>>   
>> +static int amdgpu_process_sem_wait_dep(struct amdgpu_device *adev,
>> +				       struct amdgpu_cs_parser *p,
>> +				       struct amdgpu_cs_chunk *chunk)
>> +{
>> +	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>> +	unsigned num_deps;
>> +	int i, r;
>> +	struct drm_amdgpu_cs_chunk_sem *deps;
>> +
>> +	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
>> +	num_deps = chunk->length_dw * 4 /
>> +		sizeof(struct drm_amdgpu_cs_chunk_sem);
>> +
>> +	for (i = 0; i < num_deps; ++i) {
>> +		r = amdgpu_sem_lookup_and_sync(adev, fpriv, &p->job->sync,
>> +					       deps[i].handle);
>> +		if (r)
>> +			return r;
>> +	}
>> +	return 0;
>> +}
>> +
>>   static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
>>   				  struct amdgpu_cs_parser *p)
>>   {
>> @@ -1023,12 +1047,56 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
>>   			r = amdgpu_process_fence_dep(adev, p, chunk);
>>   			if (r)
>>   				return r;
>> +		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) {
>> +			r = amdgpu_process_sem_wait_dep(adev, p, chunk);
>> +			if (r)
>> +				return r;
>>   		}
>>   	}
>>   
>>   	return 0;
>>   }
>>   
>> +static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p,
>> +					 struct amdgpu_cs_chunk *chunk,
>> +					 struct dma_fence *fence)
>> +{
>> +	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>> +	unsigned num_deps;
>> +	int i, r;
>> +	struct drm_amdgpu_cs_chunk_sem *deps;
>> +
>> +	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
>> +	num_deps = chunk->length_dw * 4 /
>> +		sizeof(struct drm_amdgpu_cs_chunk_sem);
>> +
>> +	for (i = 0; i < num_deps; ++i) {
>> +		r = amdgpu_sem_lookup_and_signal(fpriv, deps[i].handle,
>> +						 fence);
>> +		if (r)
>> +			return r;
>> +	}
>> +	return 0;
>> +}
>> +
>> +static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
>> +{
>> +	int i, r;
>> +
>> +	for (i = 0; i < p->nchunks; ++i) {
>> +		struct amdgpu_cs_chunk *chunk;
>> +
>> +		chunk = &p->chunks[i];
>> +
>> +		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_SIGNAL) {
>> +			r = amdgpu_process_sem_signal_dep(p, chunk, p->fence);
>> +			if (r)
>> +				return r;
>> +		}
>> +	}
>> +	return 0;
>> +}
>> +
>>   static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>   			    union drm_amdgpu_cs *cs)
>>   {
>> @@ -1056,7 +1124,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>>   	trace_amdgpu_cs_ioctl(job);
>>   	amd_sched_entity_push_job(&job->base);
>>   
>> -	return 0;
>> +	return amdgpu_cs_post_dependencies(p);
>>   }
>>   
>>   int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index 61d94c7..013aed1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -664,6 +664,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
>>   	mutex_init(&fpriv->bo_list_lock);
>>   	idr_init(&fpriv->bo_list_handles);
>>   
>> +	spin_lock_init(&fpriv->sem_handles_lock);
>> +	idr_init(&fpriv->sem_handles);
>>   	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
>>   
>>   	file_priv->driver_priv = fpriv;
>> @@ -689,6 +691,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>>   	struct amdgpu_device *adev = dev->dev_private;
>>   	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
>>   	struct amdgpu_bo_list *list;
>> +	struct amdgpu_sem *sem;
>>   	int handle;
>>   
>>   	if (!fpriv)
>> @@ -715,6 +718,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
>>   	idr_destroy(&fpriv->bo_list_handles);
>>   	mutex_destroy(&fpriv->bo_list_lock);
>>   
>> +	idr_for_each_entry(&fpriv->sem_handles, sem, handle)
>> +		amdgpu_sem_destroy(fpriv, handle);
>> +	idr_destroy(&fpriv->sem_handles);
>> +
>>   	kfree(fpriv);
>>   	file_priv->driver_priv = NULL;
>>   
>> @@ -896,6 +903,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
>>   	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>>   	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>>   	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>> +	DRM_IOCTL_DEF_DRV(AMDGPU_SEM, amdgpu_sem_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>>   };
>>   const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
>> new file mode 100644
>> index 0000000..066520a
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
>> @@ -0,0 +1,204 @@
>> +/*
>> + * Copyright 2016 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + * Authors:
>> + *    Chunming Zhou <david1.zhou@amd.com>
>> + */
>> +#include <linux/file.h>
>> +#include <linux/fs.h>
>> +#include <linux/kernel.h>
>> +#include <linux/poll.h>
>> +#include <linux/seq_file.h>
>> +#include <linux/export.h>
>> +#include <linux/sched.h>
>> +#include <linux/slab.h>
>> +#include <linux/uaccess.h>
>> +#include <linux/anon_inodes.h>
>> +#include <linux/sync_file.h>
>> +#include "amdgpu.h"
>> +#include <drm/drmP.h>
>> +
>> +static inline struct sync_file *amdgpu_sync_file_lookup(struct amdgpu_fpriv *fpriv, u32 handle)
>> +{
>> +	struct sync_file *sync_file;
>> +
>> +	spin_lock(&fpriv->sem_handles_lock);
>> +
>> +	/* Check if we currently have a reference on the object */
>> +	sync_file = idr_find(&fpriv->sem_handles, handle);
>> +
>> +	spin_unlock(&fpriv->sem_handles_lock);
>> +
>> +	return sync_file;
>> +}
>> +
>> +static int amdgpu_sem_create(struct amdgpu_fpriv *fpriv, u32 *handle)
>> +{
>> +	struct sync_file *sync_file = sync_file_alloc();
>> +	int ret;
>> +
>> +	if (!sync_file)
>> +		return -ENOMEM;
>> +
>> +	snprintf(sync_file->name, sizeof(sync_file->name), "sync_sem");
>> +
>> +	/* we get a file reference and we use that in the idr. */
>> +	idr_preload(GFP_KERNEL);
>> +	spin_lock(&fpriv->sem_handles_lock);
>> +
>> +	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
>> +
>> +	spin_unlock(&fpriv->sem_handles_lock);
>> +	idr_preload_end();
>> +
>> +	if (ret < 0)
>> +		return ret;
>> +
>> +	*handle = ret;
>> +	return 0;
>> +}
>> +
>> +void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle)
>> +{
>> +	struct sync_file *sync_file = amdgpu_sync_file_lookup(fpriv, handle);
>> +	if (!sync_file)
>> +		return;
>> +
>> +	spin_lock(&fpriv->sem_handles_lock);
>> +	idr_remove(&fpriv->sem_handles, handle);
>> +	spin_unlock(&fpriv->sem_handles_lock);
>> +
>> +	fput(sync_file->file);
>> +}
>> +
>> +
>> +int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
>> +				 uint32_t handle,
>> +				 struct dma_fence *fence)
>> +{
>> +	struct sync_file *sync_file;
>> +	struct dma_fence *old_fence;
>> +	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
>> +	if (!sync_file)
>> +		return -EINVAL;
>> +
>> +	old_fence = sync_file_replace_fence(sync_file, fence);
>> +	dma_fence_put(old_fence);
>> +	return 0;
>> +}
>> +
>> +static int amdgpu_sem_import(struct amdgpu_fpriv *fpriv,
>> +				       int fd, u32 *handle)
>> +{
>> +	struct sync_file *sync_file = sync_file_fdget(fd);
>> +	int ret;
>> +
>> +	if (!sync_file)
>> +		return -EINVAL;
>> +
>> +	idr_preload(GFP_KERNEL);
>> +	spin_lock(&fpriv->sem_handles_lock);
>> +
>> +	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
>> +
>> +	spin_unlock(&fpriv->sem_handles_lock);
>> +	idr_preload_end();
>> +
>> +	fput(sync_file->file);
>> +	if (ret < 0)
>> +		goto err_out;
>> +
>> +	*handle = ret;
>> +	return 0;
>> +err_out:
>> +	return ret;
>> +
>> +}
>> +
>> +static int amdgpu_sem_export(struct amdgpu_fpriv *fpriv,
>> +			     u32 handle, int *fd)
>> +{
>> +	struct sync_file *sync_file;
>> +	int ret;
>> +
>> +	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
>> +	if (!sync_file)
>> +		return -EINVAL;
>> +
>> +	ret = get_unused_fd_flags(O_CLOEXEC);
>> +	if (ret < 0)
>> +		goto err_put_file;
>> +
>> +	fd_install(ret, sync_file->file);
>> +
>> +	*fd = ret;
>> +	return 0;
>> +err_put_file:
>> +	return ret;
>> +}
>> +
>> +int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
>> +			       struct amdgpu_fpriv *fpriv,
>> +			       struct amdgpu_sync *sync,
>> +			       uint32_t handle)
>> +{
>> +	int r;
>> +	struct sync_file *sync_file;
>> +	struct dma_fence *fence;
>> +
>> +	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
>> +	if (!sync_file)
>> +		return -EINVAL;
>> +
>> +	fence = sync_file_replace_fence(sync_file, NULL);
>> +	r = amdgpu_sync_fence(adev, sync, fence);
>> +	dma_fence_put(fence);
>> +
>> +	return r;
>> +}
>> +
>> +int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
>> +		     struct drm_file *filp)
>> +{
>> +	union drm_amdgpu_sem *args = data;
>> +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> +	int r = 0;
>> +
>> +	switch (args->in.op) {
>> +	case AMDGPU_SEM_OP_CREATE_SEM:
>> +		r = amdgpu_sem_create(fpriv, &args->out.handle);
>> +		break;
>> +	case AMDGPU_SEM_OP_IMPORT_SEM:
>> +		r = amdgpu_sem_import(fpriv, args->in.handle, &args->out.handle);
>> +		break;
>> +	case AMDGPU_SEM_OP_EXPORT_SEM:
>> +		r = amdgpu_sem_export(fpriv, args->in.handle, &args->out.fd);
>> +		break;
>> +	case AMDGPU_SEM_OP_DESTROY_SEM:
>> +		amdgpu_sem_destroy(fpriv, args->in.handle);
>> +		break;
>> +	default:
>> +		r = -EINVAL;
>> +		break;
>> +	}
>> +
>> +	return r;
>> +}
>> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
>> index 5797283..646b103 100644
>> --- a/include/uapi/drm/amdgpu_drm.h
>> +++ b/include/uapi/drm/amdgpu_drm.h
>> @@ -51,6 +51,7 @@ extern "C" {
>>   #define DRM_AMDGPU_GEM_OP		0x10
>>   #define DRM_AMDGPU_GEM_USERPTR		0x11
>>   #define DRM_AMDGPU_WAIT_FENCES		0x12
>> +#define DRM_AMDGPU_SEM                  0x13
>>   
>>   #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>>   #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>> @@ -65,6 +66,7 @@ extern "C" {
>>   #define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
>>   #define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
>>   #define DRM_IOCTL_AMDGPU_WAIT_FENCES	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
>> +#define DRM_IOCTL_AMDGPU_SEM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem)
>>   
>>   #define AMDGPU_GEM_DOMAIN_CPU		0x1
>>   #define AMDGPU_GEM_DOMAIN_GTT		0x2
>> @@ -335,6 +337,26 @@ union drm_amdgpu_wait_fences {
>>   	struct drm_amdgpu_wait_fences_out out;
>>   };
>>   
>> +#define AMDGPU_SEM_OP_CREATE_SEM 0
>> +#define AMDGPU_SEM_OP_IMPORT_SEM 1
>> +#define AMDGPU_SEM_OP_EXPORT_SEM 2
>> +#define AMDGPU_SEM_OP_DESTROY_SEM 3
>> +
>> +struct drm_amdgpu_sem_in {
>> +	__u32 op;
>> +	__u32 handle;
>> +};
>> +
>> +struct drm_amdgpu_sem_out {
>> +	__u32 fd;
>> +	__u32 handle;
>> +};
>> +
>> +union drm_amdgpu_sem {
>> +	struct drm_amdgpu_sem_in in;
>> +	struct drm_amdgpu_sem_out out;
>> +};
>> +
>>   #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO	0
>>   #define AMDGPU_GEM_OP_SET_PLACEMENT		1
>>   
>> @@ -390,6 +412,8 @@ struct drm_amdgpu_gem_va {
>>   #define AMDGPU_CHUNK_ID_IB		0x01
>>   #define AMDGPU_CHUNK_ID_FENCE		0x02
>>   #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
>> +#define AMDGPU_CHUNK_ID_SEM_WAIT        0x04
>> +#define AMDGPU_CHUNK_ID_SEM_SIGNAL      0x05
>>   
>>   struct drm_amdgpu_cs_chunk {
>>   	__u32		chunk_id;
>> @@ -454,6 +478,10 @@ struct drm_amdgpu_cs_chunk_fence {
>>   	__u32 offset;
>>   };
>>   
>> +struct drm_amdgpu_cs_chunk_sem {
>> +	__u32 handle;
>> +};
>> +
>>   struct drm_amdgpu_cs_chunk_data {
>>   	union {
>>   		struct drm_amdgpu_cs_chunk_ib		ib_data;
>> -- 
>> 2.7.4
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/4] amdgpu: use sync file for shared semaphores
  2017-03-15  9:43           ` Christian König
@ 2017-03-15  9:57             ` Daniel Vetter
  0 siblings, 0 replies; 14+ messages in thread
From: Daniel Vetter @ 2017-03-15  9:57 UTC (permalink / raw)
  To: Christian König; +Cc: amd-gfx, dri-devel

On Wed, Mar 15, 2017 at 10:43:01AM +0100, Christian König wrote:
> Am 15.03.2017 um 10:01 schrieb Daniel Vetter:
> > On Tue, Mar 14, 2017 at 10:50:54AM +1000, Dave Airlie wrote:
> > > From: Dave Airlie <airlied@redhat.com>
> > > 
> > > This creates a new interface for amdgpu with ioctls to
> > > create/destroy/import and export shared semaphores using
> > > sem object backed by the sync_file code. The semaphores
> > > are not installed as fd (except for export), but rather
> > > like other driver internal objects in an idr. The idr
> > > holds the initial reference on the sync file.
> > > 
> > > The command submission interface is enhanced with two new
> > > chunks, one for semaphore waiting, one for semaphore signalling
> > > and just takes a list of handles for each.
> > > 
> > > This is based on work originally done by David Zhou at AMD,
> > > with input from Christian Konig on what things should look like.
> > > 
> > > NOTE: this interface addition needs a version bump to expose
> > > it to userspace.
> > > 
> > > Signed-off-by: Dave Airlie <airlied@redhat.com>
> > Another uapi corner case: Since you allow semaphores to be created before
> > they have a first fence attached, that essentially creates future fences.
> > I.e. sync_file fd with no fence yet attached. We want that anyway, but
> > maybe not together with semaphores (since there's some more implications).
> > 
> > I think it'd be good to attach a dummy fence which already starts out as
> > signalled to sync_file->fence for semaphores. That way userspace can't go
> > evil, create a semaphore, then pass it to a driver which then promptly
> > oopses or worse because it assumes then when it has a sync_file, it also
> > has a fence. And the dummy fence could be globally shared, so not really
> > overhead.
> 
> Sounds fishy to me, what happens in case of a race condition and the
> receiver sometimes waits on the dummy and sometimes on the real fence?
> 
> I would rather teach the users of sync_file to return a proper error code
> when you want to wait on a sync_file which doesn't have a fence yet.

Races in userspace are races in userspace :-) And it's only a problem
initially when you use a semaphore for the first time. After that you
still have the same race, but because there's always a fence attached,
your userspace won't ever notice the fail. It will simply complete
immediately (because most likely the old fence has completed already).

And it also doesn't mesh with the rough future fence plan, where the idea
is that sync_file_get_fence blocks until the fence is there, and only
drivers who can handle the risk of a fence loop (through hangcheck and
hang recovery) will use a __get_fence function to peek at the future
fence. I think the assumption that a sync_file always comes with a fence
is a good principle for code robustness in the kernel.

If you really want the userspace debugging, we can make it a module
option, or even better, sprinkle a pile of tracepoints all over fences to
make it easier to watch. But when the tradeoff is between userspace
debugging and kernel robustness, I think we need to go with kernel
robustness, for security reasons and all that.
-Daniel

> 
> Christian.
> 
> > -Daniel
> > 
> > > ---
> > >   drivers/gpu/drm/amd/amdgpu/Makefile     |   2 +-
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  12 ++
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  70 ++++++++++-
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   8 ++
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c | 204 ++++++++++++++++++++++++++++++++
> > >   include/uapi/drm/amdgpu_drm.h           |  28 +++++
> > >   6 files changed, 322 insertions(+), 2 deletions(-)
> > >   create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
> > > 
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
> > > index 2814aad..404bcba 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> > > +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> > > @@ -24,7 +24,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
> > >   	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
> > >   	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
> > >   	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
> > > -	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
> > > +	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_sem.o
> > >   # add asic specific block
> > >   amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > index c1b9135..4ed8811 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > > @@ -702,6 +702,8 @@ struct amdgpu_fpriv {
> > >   	struct mutex		bo_list_lock;
> > >   	struct idr		bo_list_handles;
> > >   	struct amdgpu_ctx_mgr	ctx_mgr;
> > > +	spinlock_t		sem_handles_lock;
> > > +	struct idr		sem_handles;
> > >   };
> > >   /*
> > > @@ -1814,5 +1816,15 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
> > >   		       uint64_t addr, struct amdgpu_bo **bo);
> > >   int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
> > > +int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
> > > +		     struct drm_file *filp);
> > > +void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle);
> > > +int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
> > > +				 uint32_t handle,
> > > +				 struct dma_fence *fence);
> > > +int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
> > > +			       struct amdgpu_fpriv *fpriv,
> > > +			       struct amdgpu_sync *sync,
> > > +			       uint32_t handle);
> > >   #include "amdgpu_object.h"
> > >   #endif
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > > index 4671432..80fc94b 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > > @@ -217,6 +217,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
> > >   			break;
> > >   		case AMDGPU_CHUNK_ID_DEPENDENCIES:
> > > +		case AMDGPU_CHUNK_ID_SEM_WAIT:
> > > +		case AMDGPU_CHUNK_ID_SEM_SIGNAL:
> > >   			break;
> > >   		default:
> > > @@ -1009,6 +1011,28 @@ static int amdgpu_process_fence_dep(struct amdgpu_device *adev,
> > >   	return 0;
> > >   }
> > > +static int amdgpu_process_sem_wait_dep(struct amdgpu_device *adev,
> > > +				       struct amdgpu_cs_parser *p,
> > > +				       struct amdgpu_cs_chunk *chunk)
> > > +{
> > > +	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
> > > +	unsigned num_deps;
> > > +	int i, r;
> > > +	struct drm_amdgpu_cs_chunk_sem *deps;
> > > +
> > > +	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
> > > +	num_deps = chunk->length_dw * 4 /
> > > +		sizeof(struct drm_amdgpu_cs_chunk_sem);
> > > +
> > > +	for (i = 0; i < num_deps; ++i) {
> > > +		r = amdgpu_sem_lookup_and_sync(adev, fpriv, &p->job->sync,
> > > +					       deps[i].handle);
> > > +		if (r)
> > > +			return r;
> > > +	}
> > > +	return 0;
> > > +}
> > > +
> > >   static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
> > >   				  struct amdgpu_cs_parser *p)
> > >   {
> > > @@ -1023,12 +1047,56 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
> > >   			r = amdgpu_process_fence_dep(adev, p, chunk);
> > >   			if (r)
> > >   				return r;
> > > +		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) {
> > > +			r = amdgpu_process_sem_wait_dep(adev, p, chunk);
> > > +			if (r)
> > > +				return r;
> > >   		}
> > >   	}
> > >   	return 0;
> > >   }
> > > +static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p,
> > > +					 struct amdgpu_cs_chunk *chunk,
> > > +					 struct dma_fence *fence)
> > > +{
> > > +	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
> > > +	unsigned num_deps;
> > > +	int i, r;
> > > +	struct drm_amdgpu_cs_chunk_sem *deps;
> > > +
> > > +	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
> > > +	num_deps = chunk->length_dw * 4 /
> > > +		sizeof(struct drm_amdgpu_cs_chunk_sem);
> > > +
> > > +	for (i = 0; i < num_deps; ++i) {
> > > +		r = amdgpu_sem_lookup_and_signal(fpriv, deps[i].handle,
> > > +						 fence);
> > > +		if (r)
> > > +			return r;
> > > +	}
> > > +	return 0;
> > > +}
> > > +
> > > +static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
> > > +{
> > > +	int i, r;
> > > +
> > > +	for (i = 0; i < p->nchunks; ++i) {
> > > +		struct amdgpu_cs_chunk *chunk;
> > > +
> > > +		chunk = &p->chunks[i];
> > > +
> > > +		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_SIGNAL) {
> > > +			r = amdgpu_process_sem_signal_dep(p, chunk, p->fence);
> > > +			if (r)
> > > +				return r;
> > > +		}
> > > +	}
> > > +	return 0;
> > > +}
> > > +
> > >   static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> > >   			    union drm_amdgpu_cs *cs)
> > >   {
> > > @@ -1056,7 +1124,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> > >   	trace_amdgpu_cs_ioctl(job);
> > >   	amd_sched_entity_push_job(&job->base);
> > > -	return 0;
> > > +	return amdgpu_cs_post_dependencies(p);
> > >   }
> > >   int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > index 61d94c7..013aed1 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > > @@ -664,6 +664,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
> > >   	mutex_init(&fpriv->bo_list_lock);
> > >   	idr_init(&fpriv->bo_list_handles);
> > > +	spin_lock_init(&fpriv->sem_handles_lock);
> > > +	idr_init(&fpriv->sem_handles);
> > >   	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
> > >   	file_priv->driver_priv = fpriv;
> > > @@ -689,6 +691,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
> > >   	struct amdgpu_device *adev = dev->dev_private;
> > >   	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
> > >   	struct amdgpu_bo_list *list;
> > > +	struct amdgpu_sem *sem;
> > >   	int handle;
> > >   	if (!fpriv)
> > > @@ -715,6 +718,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
> > >   	idr_destroy(&fpriv->bo_list_handles);
> > >   	mutex_destroy(&fpriv->bo_list_lock);
> > > +	idr_for_each_entry(&fpriv->sem_handles, sem, handle)
> > > +		amdgpu_sem_destroy(fpriv, handle);
> > > +	idr_destroy(&fpriv->sem_handles);
> > > +
> > >   	kfree(fpriv);
> > >   	file_priv->driver_priv = NULL;
> > > @@ -896,6 +903,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
> > >   	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> > >   	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> > >   	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> > > +	DRM_IOCTL_DEF_DRV(AMDGPU_SEM, amdgpu_sem_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> > >   };
> > >   const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
> > > new file mode 100644
> > > index 0000000..066520a
> > > --- /dev/null
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
> > > @@ -0,0 +1,204 @@
> > > +/*
> > > + * Copyright 2016 Advanced Micro Devices, Inc.
> > > + *
> > > + * Permission is hereby granted, free of charge, to any person obtaining a
> > > + * copy of this software and associated documentation files (the "Software"),
> > > + * to deal in the Software without restriction, including without limitation
> > > + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > > + * and/or sell copies of the Software, and to permit persons to whom the
> > > + * Software is furnished to do so, subject to the following conditions:
> > > + *
> > > + * The above copyright notice and this permission notice shall be included in
> > > + * all copies or substantial portions of the Software.
> > > + *
> > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> > > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
> > > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> > > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
> > > + * OTHER DEALINGS IN THE SOFTWARE.
> > > + *
> > > + * Authors:
> > > + *    Chunming Zhou <david1.zhou@amd.com>
> > > + */
> > > +#include <linux/file.h>
> > > +#include <linux/fs.h>
> > > +#include <linux/kernel.h>
> > > +#include <linux/poll.h>
> > > +#include <linux/seq_file.h>
> > > +#include <linux/export.h>
> > > +#include <linux/sched.h>
> > > +#include <linux/slab.h>
> > > +#include <linux/uaccess.h>
> > > +#include <linux/anon_inodes.h>
> > > +#include <linux/sync_file.h>
> > > +#include "amdgpu.h"
> > > +#include <drm/drmP.h>
> > > +
> > > +static inline struct sync_file *amdgpu_sync_file_lookup(struct amdgpu_fpriv *fpriv, u32 handle)
> > > +{
> > > +	struct sync_file *sync_file;
> > > +
> > > +	spin_lock(&fpriv->sem_handles_lock);
> > > +
> > > +	/* Check if we currently have a reference on the object */
> > > +	sync_file = idr_find(&fpriv->sem_handles, handle);
> > > +
> > > +	spin_unlock(&fpriv->sem_handles_lock);
> > > +
> > > +	return sync_file;
> > > +}
> > > +
> > > +static int amdgpu_sem_create(struct amdgpu_fpriv *fpriv, u32 *handle)
> > > +{
> > > +	struct sync_file *sync_file = sync_file_alloc();
> > > +	int ret;
> > > +
> > > +	if (!sync_file)
> > > +		return -ENOMEM;
> > > +
> > > +	snprintf(sync_file->name, sizeof(sync_file->name), "sync_sem");
> > > +
> > > +	/* we get a file reference and we use that in the idr. */
> > > +	idr_preload(GFP_KERNEL);
> > > +	spin_lock(&fpriv->sem_handles_lock);
> > > +
> > > +	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
> > > +
> > > +	spin_unlock(&fpriv->sem_handles_lock);
> > > +	idr_preload_end();
> > > +
> > > +	if (ret < 0)
> > > +		return ret;
> > > +
> > > +	*handle = ret;
> > > +	return 0;
> > > +}
> > > +
> > > +void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle)
> > > +{
> > > +	struct sync_file *sync_file = amdgpu_sync_file_lookup(fpriv, handle);
> > > +	if (!sync_file)
> > > +		return;
> > > +
> > > +	spin_lock(&fpriv->sem_handles_lock);
> > > +	idr_remove(&fpriv->sem_handles, handle);
> > > +	spin_unlock(&fpriv->sem_handles_lock);
> > > +
> > > +	fput(sync_file->file);
> > > +}
> > > +
> > > +
> > > +int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
> > > +				 uint32_t handle,
> > > +				 struct dma_fence *fence)
> > > +{
> > > +	struct sync_file *sync_file;
> > > +	struct dma_fence *old_fence;
> > > +	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
> > > +	if (!sync_file)
> > > +		return -EINVAL;
> > > +
> > > +	old_fence = sync_file_replace_fence(sync_file, fence);
> > > +	dma_fence_put(old_fence);
> > > +	return 0;
> > > +}
> > > +
> > > +static int amdgpu_sem_import(struct amdgpu_fpriv *fpriv,
> > > +				       int fd, u32 *handle)
> > > +{
> > > +	struct sync_file *sync_file = sync_file_fdget(fd);
> > > +	int ret;
> > > +
> > > +	if (!sync_file)
> > > +		return -EINVAL;
> > > +
> > > +	idr_preload(GFP_KERNEL);
> > > +	spin_lock(&fpriv->sem_handles_lock);
> > > +
> > > +	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
> > > +
> > > +	spin_unlock(&fpriv->sem_handles_lock);
> > > +	idr_preload_end();
> > > +
> > > +	fput(sync_file->file);
> > > +	if (ret < 0)
> > > +		goto err_out;
> > > +
> > > +	*handle = ret;
> > > +	return 0;
> > > +err_out:
> > > +	return ret;
> > > +
> > > +}
> > > +
> > > +static int amdgpu_sem_export(struct amdgpu_fpriv *fpriv,
> > > +			     u32 handle, int *fd)
> > > +{
> > > +	struct sync_file *sync_file;
> > > +	int ret;
> > > +
> > > +	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
> > > +	if (!sync_file)
> > > +		return -EINVAL;
> > > +
> > > +	ret = get_unused_fd_flags(O_CLOEXEC);
> > > +	if (ret < 0)
> > > +		goto err_put_file;
> > > +
> > > +	fd_install(ret, sync_file->file);
> > > +
> > > +	*fd = ret;
> > > +	return 0;
> > > +err_put_file:
> > > +	return ret;
> > > +}
> > > +
> > > +int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
> > > +			       struct amdgpu_fpriv *fpriv,
> > > +			       struct amdgpu_sync *sync,
> > > +			       uint32_t handle)
> > > +{
> > > +	int r;
> > > +	struct sync_file *sync_file;
> > > +	struct dma_fence *fence;
> > > +
> > > +	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
> > > +	if (!sync_file)
> > > +		return -EINVAL;
> > > +
> > > +	fence = sync_file_replace_fence(sync_file, NULL);
> > > +	r = amdgpu_sync_fence(adev, sync, fence);
> > > +	dma_fence_put(fence);
> > > +
> > > +	return r;
> > > +}
> > > +
> > > +int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
> > > +		     struct drm_file *filp)
> > > +{
> > > +	union drm_amdgpu_sem *args = data;
> > > +	struct amdgpu_fpriv *fpriv = filp->driver_priv;
> > > +	int r = 0;
> > > +
> > > +	switch (args->in.op) {
> > > +	case AMDGPU_SEM_OP_CREATE_SEM:
> > > +		r = amdgpu_sem_create(fpriv, &args->out.handle);
> > > +		break;
> > > +	case AMDGPU_SEM_OP_IMPORT_SEM:
> > > +		r = amdgpu_sem_import(fpriv, args->in.handle, &args->out.handle);
> > > +		break;
> > > +	case AMDGPU_SEM_OP_EXPORT_SEM:
> > > +		r = amdgpu_sem_export(fpriv, args->in.handle, &args->out.fd);
> > > +		break;
> > > +	case AMDGPU_SEM_OP_DESTROY_SEM:
> > > +		amdgpu_sem_destroy(fpriv, args->in.handle);
> > > +		break;
> > > +	default:
> > > +		r = -EINVAL;
> > > +		break;
> > > +	}
> > > +
> > > +	return r;
> > > +}
> > > diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> > > index 5797283..646b103 100644
> > > --- a/include/uapi/drm/amdgpu_drm.h
> > > +++ b/include/uapi/drm/amdgpu_drm.h
> > > @@ -51,6 +51,7 @@ extern "C" {
> > >   #define DRM_AMDGPU_GEM_OP		0x10
> > >   #define DRM_AMDGPU_GEM_USERPTR		0x11
> > >   #define DRM_AMDGPU_WAIT_FENCES		0x12
> > > +#define DRM_AMDGPU_SEM                  0x13
> > >   #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
> > >   #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
> > > @@ -65,6 +66,7 @@ extern "C" {
> > >   #define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
> > >   #define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
> > >   #define DRM_IOCTL_AMDGPU_WAIT_FENCES	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
> > > +#define DRM_IOCTL_AMDGPU_SEM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem)
> > >   #define AMDGPU_GEM_DOMAIN_CPU		0x1
> > >   #define AMDGPU_GEM_DOMAIN_GTT		0x2
> > > @@ -335,6 +337,26 @@ union drm_amdgpu_wait_fences {
> > >   	struct drm_amdgpu_wait_fences_out out;
> > >   };
> > > +#define AMDGPU_SEM_OP_CREATE_SEM 0
> > > +#define AMDGPU_SEM_OP_IMPORT_SEM 1
> > > +#define AMDGPU_SEM_OP_EXPORT_SEM 2
> > > +#define AMDGPU_SEM_OP_DESTROY_SEM 3
> > > +
> > > +struct drm_amdgpu_sem_in {
> > > +	__u32 op;
> > > +	__u32 handle;
> > > +};
> > > +
> > > +struct drm_amdgpu_sem_out {
> > > +	__u32 fd;
> > > +	__u32 handle;
> > > +};
> > > +
> > > +union drm_amdgpu_sem {
> > > +	struct drm_amdgpu_sem_in in;
> > > +	struct drm_amdgpu_sem_out out;
> > > +};
> > > +
> > >   #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO	0
> > >   #define AMDGPU_GEM_OP_SET_PLACEMENT		1
> > > @@ -390,6 +412,8 @@ struct drm_amdgpu_gem_va {
> > >   #define AMDGPU_CHUNK_ID_IB		0x01
> > >   #define AMDGPU_CHUNK_ID_FENCE		0x02
> > >   #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
> > > +#define AMDGPU_CHUNK_ID_SEM_WAIT        0x04
> > > +#define AMDGPU_CHUNK_ID_SEM_SIGNAL      0x05
> > >   struct drm_amdgpu_cs_chunk {
> > >   	__u32		chunk_id;
> > > @@ -454,6 +478,10 @@ struct drm_amdgpu_cs_chunk_fence {
> > >   	__u32 offset;
> > >   };
> > > +struct drm_amdgpu_cs_chunk_sem {
> > > +	__u32 handle;
> > > +};
> > > +
> > >   struct drm_amdgpu_cs_chunk_data {
> > >   	union {
> > >   		struct drm_amdgpu_cs_chunk_ib		ib_data;
> > > -- 
> > > 2.7.4
> > > 
> > > _______________________________________________
> > > dri-devel mailing list
> > > dri-devel@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> 
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [rfc/repost] amdgpu/sync_file shared semaphores
@ 2017-03-20  7:03 Dave Airlie
  2017-03-20  7:03 ` [PATCH 2/4] sync_file: add replace and export some functionality (v2) Dave Airlie
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Dave Airlie @ 2017-03-20  7:03 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

This is a repost of the file_sync semaphore support.

The main difference in this patch is patch1 does a lot
better at handling NULL fences in some places. The poll code
and ioctls should handle ending up with fence being NULL properly now.

Dave.

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/4] sync_file: add a mutex to protect fence and callback members. (v3)
       [not found] ` <20170320070307.18344-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-03-20  7:03   ` Dave Airlie
       [not found]     ` <20170320070307.18344-2-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-03-20  7:03   ` [PATCH 3/4] amdgpu/cs: split out fence dependency checking Dave Airlie
  1 sibling, 1 reply; 14+ messages in thread
From: Dave Airlie @ 2017-03-20  7:03 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Dave Airlie <airlied@redhat.com>

This patch allows the underlying fence in a sync_file to be changed
or set to NULL. This isn't currently required but for Vulkan
semaphores we need to be able to swap and reset the fence.

In order to faciliate this, it uses rcu to protect the fence,
along with a new mutex. The mutex also protects the callback.
It also checks for NULL when retrieving the rcu protected
fence in case it has been reset.

v1.1: fix the locking (Julia Lawall).
v2: use rcu try one
v3: fix poll to use proper rcu, fixup merge/fill ioctls
to not crash on NULL fence cases.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/dma-buf/sync_file.c | 112 +++++++++++++++++++++++++++++++++++---------
 include/linux/sync_file.h   |   5 +-
 2 files changed, 94 insertions(+), 23 deletions(-)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 2321035..dcba931 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -28,6 +28,10 @@
 
 static const struct file_operations sync_file_fops;
 
+#define sync_file_held(obj) lockdep_is_held(&(obj)->lock)
+#define sync_file_assert_held(obj) \
+	lockdep_assert_held(&(obj)->lock)
+
 static struct sync_file *sync_file_alloc(void)
 {
 	struct sync_file *sync_file;
@@ -47,6 +51,9 @@ static struct sync_file *sync_file_alloc(void)
 
 	INIT_LIST_HEAD(&sync_file->cb.node);
 
+	RCU_INIT_POINTER(sync_file->fence, NULL);
+
+	mutex_init(&sync_file->lock);
 	return sync_file;
 
 err:
@@ -80,7 +87,9 @@ struct sync_file *sync_file_create(struct dma_fence *fence)
 	if (!sync_file)
 		return NULL;
 
-	sync_file->fence = dma_fence_get(fence);
+	dma_fence_get(fence);
+
+	RCU_INIT_POINTER(sync_file->fence, fence);
 
 	snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d",
 		 fence->ops->get_driver_name(fence),
@@ -124,13 +133,26 @@ struct dma_fence *sync_file_get_fence(int fd)
 	if (!sync_file)
 		return NULL;
 
-	fence = dma_fence_get(sync_file->fence);
+	if (!rcu_access_pointer(sync_file->fence))
+		return NULL;
+
+	rcu_read_lock();
+	fence = dma_fence_get_rcu_safe(&sync_file->fence);
+	rcu_read_unlock();
+
 	fput(sync_file->file);
 
 	return fence;
 }
 EXPORT_SYMBOL(sync_file_get_fence);
 
+static inline struct dma_fence *
+sync_file_get_fence_locked(struct sync_file *sync_file)
+{
+	return rcu_dereference_protected(sync_file->fence,
+					 sync_file_held(sync_file));
+}
+
 static int sync_file_set_fence(struct sync_file *sync_file,
 			       struct dma_fence **fences, int num_fences)
 {
@@ -143,7 +165,7 @@ static int sync_file_set_fence(struct sync_file *sync_file,
 	 * we own the reference of the dma_fence_array creation.
 	 */
 	if (num_fences == 1) {
-		sync_file->fence = fences[0];
+		RCU_INIT_POINTER(sync_file->fence, fences[0]);
 		kfree(fences);
 	} else {
 		array = dma_fence_array_create(num_fences, fences,
@@ -152,17 +174,25 @@ static int sync_file_set_fence(struct sync_file *sync_file,
 		if (!array)
 			return -ENOMEM;
 
-		sync_file->fence = &array->base;
+		RCU_INIT_POINTER(sync_file->fence, &array->base);
 	}
 
 	return 0;
 }
 
+/* must be called with sync_file lock taken */
 static struct dma_fence **get_fences(struct sync_file *sync_file,
 				     int *num_fences)
 {
-	if (dma_fence_is_array(sync_file->fence)) {
-		struct dma_fence_array *array = to_dma_fence_array(sync_file->fence);
+	struct dma_fence *fence = sync_file_get_fence_locked(sync_file);
+
+	if (!fence) {
+		*num_fences = 0;
+		return NULL;
+	}
+
+	if (dma_fence_is_array(fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(fence);
 
 		*num_fences = array->num_fences;
 		return array->fences;
@@ -204,10 +234,16 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
 	if (!sync_file)
 		return NULL;
 
+	mutex_lock(&a->lock);
+	mutex_lock(&b->lock);
 	a_fences = get_fences(a, &a_num_fences);
 	b_fences = get_fences(b, &b_num_fences);
-	if (a_num_fences > INT_MAX - b_num_fences)
-		return NULL;
+	if (!a_num_fences || !b_num_fences)
+		goto unlock;
+
+	if (a_num_fences > INT_MAX - b_num_fences) {
+		goto unlock;
+	}
 
 	num_fences = a_num_fences + b_num_fences;
 
@@ -268,11 +304,17 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
 		goto err;
 	}
 
+	mutex_unlock(&b->lock);
+	mutex_unlock(&a->lock);
+
 	strlcpy(sync_file->name, name, sizeof(sync_file->name));
 	return sync_file;
 
 err:
 	fput(sync_file->file);
+unlock:
+	mutex_unlock(&b->lock);
+	mutex_unlock(&a->lock);
 	return NULL;
 
 }
@@ -281,10 +323,15 @@ static void sync_file_free(struct kref *kref)
 {
 	struct sync_file *sync_file = container_of(kref, struct sync_file,
 						     kref);
+	struct dma_fence *fence;
+
+	fence = rcu_dereference_protected(sync_file->fence, 1);
+	if (fence) {
+		if (test_bit(POLL_ENABLED, &fence->flags))
+			dma_fence_remove_callback(fence, &sync_file->cb);
+		dma_fence_put(fence);
+	}
 
-	if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
-		dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
-	dma_fence_put(sync_file->fence);
 	kfree(sync_file);
 }
 
@@ -299,16 +346,25 @@ static int sync_file_release(struct inode *inode, struct file *file)
 static unsigned int sync_file_poll(struct file *file, poll_table *wait)
 {
 	struct sync_file *sync_file = file->private_data;
+	unsigned int ret_val = 0;
+	struct dma_fence *fence;
 
 	poll_wait(file, &sync_file->wq, wait);
 
-	if (!test_and_set_bit(POLL_ENABLED, &sync_file->fence->flags)) {
-		if (dma_fence_add_callback(sync_file->fence, &sync_file->cb,
-					   fence_check_cb_func) < 0)
-			wake_up_all(&sync_file->wq);
+	mutex_lock(&sync_file->lock);
+
+	fence = sync_file_get_fence_locked(sync_file);
+	if (fence) {
+		if (!test_and_set_bit(POLL_ENABLED, &fence->flags)) {
+			if (dma_fence_add_callback(fence, &sync_file->cb,
+						   fence_check_cb_func) < 0)
+				wake_up_all(&sync_file->wq);
+		}
+		ret_val = dma_fence_is_signaled(fence) ? POLLIN : 0;
 	}
+	mutex_unlock(&sync_file->lock);
 
-	return dma_fence_is_signaled(sync_file->fence) ? POLLIN : 0;
+	return ret_val;
 }
 
 static long sync_file_ioctl_merge(struct sync_file *sync_file,
@@ -393,8 +449,13 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
 	if (info.flags || info.pad)
 		return -EINVAL;
 
+	mutex_lock(&sync_file->lock);
 	fences = get_fences(sync_file, &num_fences);
 
+	/* if there are no fences in the sync_file just return */
+	if (!num_fences)
+		goto no_fences;
+
 	/*
 	 * Passing num_fences = 0 means that userspace doesn't want to
 	 * retrieve any sync_fence_info. If num_fences = 0 we skip filling
@@ -404,13 +465,17 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
 	if (!info.num_fences)
 		goto no_fences;
 
-	if (info.num_fences < num_fences)
-		return -EINVAL;
+	if (info.num_fences < num_fences) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	size = num_fences * sizeof(*fence_info);
 	fence_info = kzalloc(size, GFP_KERNEL);
-	if (!fence_info)
-		return -ENOMEM;
+	if (!fence_info) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	for (i = 0; i < num_fences; i++)
 		sync_fill_fence_info(fences[i], &fence_info[i]);
@@ -423,7 +488,10 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
 
 no_fences:
 	strlcpy(info.name, sync_file->name, sizeof(info.name));
-	info.status = dma_fence_is_signaled(sync_file->fence);
+	if (num_fences)
+		info.status = dma_fence_is_signaled(sync_file->fence);
+	else
+		info.status = -ENOENT;
 	info.num_fences = num_fences;
 
 	if (copy_to_user((void __user *)arg, &info, sizeof(info)))
@@ -433,7 +501,7 @@ static long sync_file_ioctl_fence_info(struct sync_file *sync_file,
 
 out:
 	kfree(fence_info);
-
+	mutex_unlock(&sync_file->lock);
 	return ret;
 }
 
diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
index 3e3ab84..006412f 100644
--- a/include/linux/sync_file.h
+++ b/include/linux/sync_file.h
@@ -30,6 +30,7 @@
  * @wq:			wait queue for fence signaling
  * @fence:		fence with the fences in the sync_file
  * @cb:			fence callback information
+ * @lock:               mutex to protect fence/cb - used for semaphores
  */
 struct sync_file {
 	struct file		*file;
@@ -41,8 +42,10 @@ struct sync_file {
 
 	wait_queue_head_t	wq;
 
-	struct dma_fence	*fence;
+	struct dma_fence __rcu	*fence;
 	struct dma_fence_cb cb;
+	/* protects the fence pointer and cb */
+	struct mutex lock;
 };
 
 #define POLL_ENABLED DMA_FENCE_FLAG_USER_BITS
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/4] sync_file: add replace and export some functionality (v2)
  2017-03-20  7:03 [rfc/repost] amdgpu/sync_file shared semaphores Dave Airlie
@ 2017-03-20  7:03 ` Dave Airlie
       [not found] ` <20170320070307.18344-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 14+ messages in thread
From: Dave Airlie @ 2017-03-20  7:03 UTC (permalink / raw)
  To: amd-gfx, dri-devel

From: Dave Airlie <airlied@redhat.com>

Using sync_file to back vulkan semaphores means need to replace
the fence underlying the sync file. This replace function removes
the callback, swaps the fence, and returns the old one. This
also exports the alloc and fdget functionality for the semaphore
wrapper code.

v2: use rcu.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/dma-buf/sync_file.c | 42 ++++++++++++++++++++++++++++++++++++++++--
 include/linux/sync_file.h   |  5 ++++-
 2 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index dcba931..d826066 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -32,7 +32,14 @@ static const struct file_operations sync_file_fops;
 #define sync_file_assert_held(obj) \
 	lockdep_assert_held(&(obj)->lock)
 
-static struct sync_file *sync_file_alloc(void)
+/**
+ * sync_file_alloc() - allocate an unfenced sync file
+ *
+ * Creates a sync_file.
+ * The sync_file can be released with fput(sync_file->file).
+ * Returns the sync_file or NULL in case of error.
+ */
+struct sync_file *sync_file_alloc(void)
 {
 	struct sync_file *sync_file;
 
@@ -60,6 +67,7 @@ static struct sync_file *sync_file_alloc(void)
 	kfree(sync_file);
 	return NULL;
 }
+EXPORT_SYMBOL(sync_file_alloc);
 
 static void fence_check_cb_func(struct dma_fence *f, struct dma_fence_cb *cb)
 {
@@ -100,7 +108,7 @@ struct sync_file *sync_file_create(struct dma_fence *fence)
 }
 EXPORT_SYMBOL(sync_file_create);
 
-static struct sync_file *sync_file_fdget(int fd)
+struct sync_file *sync_file_fdget(int fd)
 {
 	struct file *file = fget(fd);
 
@@ -116,6 +124,7 @@ static struct sync_file *sync_file_fdget(int fd)
 	fput(file);
 	return NULL;
 }
+EXPORT_SYMBOL(sync_file_fdget);
 
 /**
  * sync_file_get_fence - get the fence related to the sync_file fd
@@ -153,6 +162,35 @@ sync_file_get_fence_locked(struct sync_file *sync_file)
 					 sync_file_held(sync_file));
 }
 
+/**
+ * sync_file_replace_fence - replace the fence related to the sync_file
+ * @sync_file:	 sync file to replace fence in
+ * @fence: fence to replace with (or NULL for no fence).
+ * Returns previous fence.
+ */
+struct dma_fence *sync_file_replace_fence(struct sync_file *sync_file,
+					  struct dma_fence *fence)
+{
+	struct dma_fence *ret_fence = NULL;
+
+	if (fence)
+		dma_fence_get(fence);
+
+	mutex_lock(&sync_file->lock);
+
+	ret_fence = sync_file_get_fence_locked(sync_file);
+	if (ret_fence) {
+		if (test_bit(POLL_ENABLED, &ret_fence->flags))
+			dma_fence_remove_callback(ret_fence, &sync_file->cb);
+	}
+
+	RCU_INIT_POINTER(sync_file->fence, fence);
+
+	mutex_unlock(&sync_file->lock);
+	return ret_fence;
+}
+EXPORT_SYMBOL(sync_file_replace_fence);
+
 static int sync_file_set_fence(struct sync_file *sync_file,
 			       struct dma_fence **fences, int num_fences)
 {
diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h
index 006412f..555ae99 100644
--- a/include/linux/sync_file.h
+++ b/include/linux/sync_file.h
@@ -50,7 +50,10 @@ struct sync_file {
 
 #define POLL_ENABLED DMA_FENCE_FLAG_USER_BITS
 
+struct sync_file *sync_file_alloc(void);
 struct sync_file *sync_file_create(struct dma_fence *fence);
 struct dma_fence *sync_file_get_fence(int fd);
-
+struct sync_file *sync_file_fdget(int fd);
+struct dma_fence *sync_file_replace_fence(struct sync_file *sync_file,
+					  struct dma_fence *fence);
 #endif /* _LINUX_SYNC_H */
-- 
2.7.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/4] amdgpu/cs: split out fence dependency checking
       [not found] ` <20170320070307.18344-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-03-20  7:03   ` [PATCH 1/4] sync_file: add a mutex to protect fence and callback members. (v3) Dave Airlie
@ 2017-03-20  7:03   ` Dave Airlie
  1 sibling, 0 replies; 14+ messages in thread
From: Dave Airlie @ 2017-03-20  7:03 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Dave Airlie <airlied@redhat.com>

This just splits out the fence depenency checking into it's
own function to make it easier to add semaphore dependencies.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 86 +++++++++++++++++++---------------
 1 file changed, 48 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 99424cb..4671432 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -963,56 +963,66 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 	return 0;
 }
 
-static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
-				  struct amdgpu_cs_parser *p)
+static int amdgpu_process_fence_dep(struct amdgpu_device *adev,
+				    struct amdgpu_cs_parser *p,
+				    struct amdgpu_cs_chunk *chunk)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
-	int i, j, r;
-
-	for (i = 0; i < p->nchunks; ++i) {
-		struct drm_amdgpu_cs_chunk_dep *deps;
-		struct amdgpu_cs_chunk *chunk;
-		unsigned num_deps;
+	unsigned num_deps;
+	int i, r;
+	struct drm_amdgpu_cs_chunk_dep *deps;
 
-		chunk = &p->chunks[i];
+	deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_dep);
 
-		if (chunk->chunk_id != AMDGPU_CHUNK_ID_DEPENDENCIES)
-			continue;
+	for (i = 0; i < num_deps; ++i) {
+		struct amdgpu_ring *ring;
+		struct amdgpu_ctx *ctx;
+		struct dma_fence *fence;
 
-		deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
-		num_deps = chunk->length_dw * 4 /
-			sizeof(struct drm_amdgpu_cs_chunk_dep);
+		r = amdgpu_cs_get_ring(adev, deps[i].ip_type,
+				       deps[i].ip_instance,
+				       deps[i].ring, &ring);
+		if (r)
+			return r;
 
-		for (j = 0; j < num_deps; ++j) {
-			struct amdgpu_ring *ring;
-			struct amdgpu_ctx *ctx;
-			struct dma_fence *fence;
+		ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
+		if (ctx == NULL)
+			return -EINVAL;
 
-			r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
-					       deps[j].ip_instance,
-					       deps[j].ring, &ring);
+		fence = amdgpu_ctx_get_fence(ctx, ring,
+					     deps[i].handle);
+		if (IS_ERR(fence)) {
+			r = PTR_ERR(fence);
+			amdgpu_ctx_put(ctx);
+			return r;
+		} else if (fence) {
+			r = amdgpu_sync_fence(adev, &p->job->sync,
+					      fence);
+			dma_fence_put(fence);
+			amdgpu_ctx_put(ctx);
 			if (r)
 				return r;
+		}
+	}
+	return 0;
+}
 
-			ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id);
-			if (ctx == NULL)
-				return -EINVAL;
+static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
+				  struct amdgpu_cs_parser *p)
+{
+	int i, r;
 
-			fence = amdgpu_ctx_get_fence(ctx, ring,
-						     deps[j].handle);
-			if (IS_ERR(fence)) {
-				r = PTR_ERR(fence);
-				amdgpu_ctx_put(ctx);
-				return r;
+	for (i = 0; i < p->nchunks; ++i) {
+		struct amdgpu_cs_chunk *chunk;
 
-			} else if (fence) {
-				r = amdgpu_sync_fence(adev, &p->job->sync,
-						      fence);
-				dma_fence_put(fence);
-				amdgpu_ctx_put(ctx);
-				if (r)
-					return r;
-			}
+		chunk = &p->chunks[i];
+
+		if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
+			r = amdgpu_process_fence_dep(adev, p, chunk);
+			if (r)
+				return r;
 		}
 	}
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/4] amdgpu: use sync file for shared semaphores
  2017-03-20  7:03 [rfc/repost] amdgpu/sync_file shared semaphores Dave Airlie
  2017-03-20  7:03 ` [PATCH 2/4] sync_file: add replace and export some functionality (v2) Dave Airlie
       [not found] ` <20170320070307.18344-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-03-20  7:03 ` Dave Airlie
  2017-03-20  8:42 ` [rfc/repost] amdgpu/sync_file " Daniel Vetter
  3 siblings, 0 replies; 14+ messages in thread
From: Dave Airlie @ 2017-03-20  7:03 UTC (permalink / raw)
  To: amd-gfx, dri-devel

From: Dave Airlie <airlied@redhat.com>

This creates a new interface for amdgpu with ioctls to
create/destroy/import and export shared semaphores using
sem object backed by the sync_file code. The semaphores
are not installed as fd (except for export), but rather
like other driver internal objects in an idr. The idr
holds the initial reference on the sync file.

The command submission interface is enhanced with two new
chunks, one for semaphore waiting, one for semaphore signalling
and just takes a list of handles for each.

This is based on work originally done by David Zhou at AMD,
with input from Christian Konig on what things should look like.

NOTE: this interface addition needs a version bump to expose
it to userspace.

v1.1: keep file reference on import.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/amd/amdgpu/Makefile     |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  12 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  70 ++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |   8 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c | 203 ++++++++++++++++++++++++++++++++
 include/uapi/drm/amdgpu_drm.h           |  28 +++++
 6 files changed, 321 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2814aad..404bcba 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -24,7 +24,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
 	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
-	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
+	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_sem.o
 
 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c1b9135..4ed8811 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -702,6 +702,8 @@ struct amdgpu_fpriv {
 	struct mutex		bo_list_lock;
 	struct idr		bo_list_handles;
 	struct amdgpu_ctx_mgr	ctx_mgr;
+	spinlock_t		sem_handles_lock;
+	struct idr		sem_handles;
 };
 
 /*
@@ -1814,5 +1816,15 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
 		       uint64_t addr, struct amdgpu_bo **bo);
 int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser);
 
+int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *filp);
+void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle);
+int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
+				 uint32_t handle,
+				 struct dma_fence *fence);
+int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
+			       struct amdgpu_fpriv *fpriv,
+			       struct amdgpu_sync *sync,
+			       uint32_t handle);
 #include "amdgpu_object.h"
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 4671432..80fc94b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -217,6 +217,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 			break;
 
 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
+		case AMDGPU_CHUNK_ID_SEM_WAIT:
+		case AMDGPU_CHUNK_ID_SEM_SIGNAL:
 			break;
 
 		default:
@@ -1009,6 +1011,28 @@ static int amdgpu_process_fence_dep(struct amdgpu_device *adev,
 	return 0;
 }
 
+static int amdgpu_process_sem_wait_dep(struct amdgpu_device *adev,
+				       struct amdgpu_cs_parser *p,
+				       struct amdgpu_cs_chunk *chunk)
+{
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	unsigned num_deps;
+	int i, r;
+	struct drm_amdgpu_cs_chunk_sem *deps;
+
+	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+	for (i = 0; i < num_deps; ++i) {
+		r = amdgpu_sem_lookup_and_sync(adev, fpriv, &p->job->sync,
+					       deps[i].handle);
+		if (r)
+			return r;
+	}
+	return 0;
+}
+
 static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 				  struct amdgpu_cs_parser *p)
 {
@@ -1023,12 +1047,56 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
 			r = amdgpu_process_fence_dep(adev, p, chunk);
 			if (r)
 				return r;
+		} else if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_WAIT) {
+			r = amdgpu_process_sem_wait_dep(adev, p, chunk);
+			if (r)
+				return r;
 		}
 	}
 
 	return 0;
 }
 
+static int amdgpu_process_sem_signal_dep(struct amdgpu_cs_parser *p,
+					 struct amdgpu_cs_chunk *chunk,
+					 struct dma_fence *fence)
+{
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	unsigned num_deps;
+	int i, r;
+	struct drm_amdgpu_cs_chunk_sem *deps;
+
+	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+	num_deps = chunk->length_dw * 4 /
+		sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+	for (i = 0; i < num_deps; ++i) {
+		r = amdgpu_sem_lookup_and_signal(fpriv, deps[i].handle,
+						 fence);
+		if (r)
+			return r;
+	}
+	return 0;
+}
+
+static int amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
+{
+	int i, r;
+
+	for (i = 0; i < p->nchunks; ++i) {
+		struct amdgpu_cs_chunk *chunk;
+
+		chunk = &p->chunks[i];
+
+		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SEM_SIGNAL) {
+			r = amdgpu_process_sem_signal_dep(p, chunk, p->fence);
+			if (r)
+				return r;
+		}
+	}
+	return 0;
+}
+
 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 			    union drm_amdgpu_cs *cs)
 {
@@ -1056,7 +1124,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	trace_amdgpu_cs_ioctl(job);
 	amd_sched_entity_push_job(&job->base);
 
-	return 0;
+	return amdgpu_cs_post_dependencies(p);
 }
 
 int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 61d94c7..013aed1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -664,6 +664,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 	mutex_init(&fpriv->bo_list_lock);
 	idr_init(&fpriv->bo_list_handles);
 
+	spin_lock_init(&fpriv->sem_handles_lock);
+	idr_init(&fpriv->sem_handles);
 	amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
 
 	file_priv->driver_priv = fpriv;
@@ -689,6 +691,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
 	struct amdgpu_bo_list *list;
+	struct amdgpu_sem *sem;
 	int handle;
 
 	if (!fpriv)
@@ -715,6 +718,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	idr_destroy(&fpriv->bo_list_handles);
 	mutex_destroy(&fpriv->bo_list_lock);
 
+	idr_for_each_entry(&fpriv->sem_handles, sem, handle)
+		amdgpu_sem_destroy(fpriv, handle);
+	idr_destroy(&fpriv->sem_handles);
+
 	kfree(fpriv);
 	file_priv->driver_priv = NULL;
 
@@ -896,6 +903,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_VA, amdgpu_gem_va_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_OP, amdgpu_gem_op_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(AMDGPU_SEM, amdgpu_sem_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 };
 const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
new file mode 100644
index 0000000..94a637f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sem.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chunming Zhou <david1.zhou@amd.com>
+ */
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/poll.h>
+#include <linux/seq_file.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/anon_inodes.h>
+#include <linux/sync_file.h>
+#include "amdgpu.h"
+#include <drm/drmP.h>
+
+static inline struct sync_file *amdgpu_sync_file_lookup(struct amdgpu_fpriv *fpriv, u32 handle)
+{
+	struct sync_file *sync_file;
+
+	spin_lock(&fpriv->sem_handles_lock);
+
+	/* Check if we currently have a reference on the object */
+	sync_file = idr_find(&fpriv->sem_handles, handle);
+
+	spin_unlock(&fpriv->sem_handles_lock);
+
+	return sync_file;
+}
+
+static int amdgpu_sem_create(struct amdgpu_fpriv *fpriv, u32 *handle)
+{
+	struct sync_file *sync_file = sync_file_alloc();
+	int ret;
+
+	if (!sync_file)
+		return -ENOMEM;
+
+	snprintf(sync_file->name, sizeof(sync_file->name), "sync_sem");
+
+	/* we get a file reference and we use that in the idr. */
+	idr_preload(GFP_KERNEL);
+	spin_lock(&fpriv->sem_handles_lock);
+
+	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
+
+	spin_unlock(&fpriv->sem_handles_lock);
+	idr_preload_end();
+
+	if (ret < 0)
+		return ret;
+
+	*handle = ret;
+	return 0;
+}
+
+void amdgpu_sem_destroy(struct amdgpu_fpriv *fpriv, u32 handle)
+{
+	struct sync_file *sync_file = amdgpu_sync_file_lookup(fpriv, handle);
+	if (!sync_file)
+		return;
+
+	spin_lock(&fpriv->sem_handles_lock);
+	idr_remove(&fpriv->sem_handles, handle);
+	spin_unlock(&fpriv->sem_handles_lock);
+
+	fput(sync_file->file);
+}
+
+
+int amdgpu_sem_lookup_and_signal(struct amdgpu_fpriv *fpriv,
+				 uint32_t handle,
+				 struct dma_fence *fence)
+{
+	struct sync_file *sync_file;
+	struct dma_fence *old_fence;
+	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
+	if (!sync_file)
+		return -EINVAL;
+
+	old_fence = sync_file_replace_fence(sync_file, fence);
+	dma_fence_put(old_fence);
+	return 0;
+}
+
+static int amdgpu_sem_import(struct amdgpu_fpriv *fpriv,
+				       int fd, u32 *handle)
+{
+	struct sync_file *sync_file = sync_file_fdget(fd);
+	int ret;
+
+	if (!sync_file)
+		return -EINVAL;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&fpriv->sem_handles_lock);
+
+	ret = idr_alloc(&fpriv->sem_handles, sync_file, 1, 0, GFP_NOWAIT);
+
+	spin_unlock(&fpriv->sem_handles_lock);
+	idr_preload_end();
+
+	if (ret < 0)
+		goto err_out;
+
+	*handle = ret;
+	return 0;
+err_out:
+	return ret;
+
+}
+
+static int amdgpu_sem_export(struct amdgpu_fpriv *fpriv,
+			     u32 handle, int *fd)
+{
+	struct sync_file *sync_file;
+	int ret;
+
+	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
+	if (!sync_file)
+		return -EINVAL;
+
+	ret = get_unused_fd_flags(O_CLOEXEC);
+	if (ret < 0)
+		goto err_put_file;
+
+	fd_install(ret, sync_file->file);
+
+	*fd = ret;
+	return 0;
+err_put_file:
+	return ret;
+}
+
+int amdgpu_sem_lookup_and_sync(struct amdgpu_device *adev,
+			       struct amdgpu_fpriv *fpriv,
+			       struct amdgpu_sync *sync,
+			       uint32_t handle)
+{
+	int r;
+	struct sync_file *sync_file;
+	struct dma_fence *fence;
+
+	sync_file = amdgpu_sync_file_lookup(fpriv, handle);
+	if (!sync_file)
+		return -EINVAL;
+
+	fence = sync_file_replace_fence(sync_file, NULL);
+	r = amdgpu_sync_fence(adev, sync, fence);
+	dma_fence_put(fence);
+
+	return r;
+}
+
+int amdgpu_sem_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *filp)
+{
+	union drm_amdgpu_sem *args = data;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	int r = 0;
+
+	switch (args->in.op) {
+	case AMDGPU_SEM_OP_CREATE_SEM:
+		r = amdgpu_sem_create(fpriv, &args->out.handle);
+		break;
+	case AMDGPU_SEM_OP_IMPORT_SEM:
+		r = amdgpu_sem_import(fpriv, args->in.handle, &args->out.handle);
+		break;
+	case AMDGPU_SEM_OP_EXPORT_SEM:
+		r = amdgpu_sem_export(fpriv, args->in.handle, &args->out.fd);
+		break;
+	case AMDGPU_SEM_OP_DESTROY_SEM:
+		amdgpu_sem_destroy(fpriv, args->in.handle);
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 5797283..646b103 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -51,6 +51,7 @@ extern "C" {
 #define DRM_AMDGPU_GEM_OP		0x10
 #define DRM_AMDGPU_GEM_USERPTR		0x11
 #define DRM_AMDGPU_WAIT_FENCES		0x12
+#define DRM_AMDGPU_SEM                  0x13
 
 #define DRM_IOCTL_AMDGPU_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
 #define DRM_IOCTL_AMDGPU_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
@@ -65,6 +66,7 @@ extern "C" {
 #define DRM_IOCTL_AMDGPU_GEM_OP		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_OP, struct drm_amdgpu_gem_op)
 #define DRM_IOCTL_AMDGPU_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_USERPTR, struct drm_amdgpu_gem_userptr)
 #define DRM_IOCTL_AMDGPU_WAIT_FENCES	DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_WAIT_FENCES, union drm_amdgpu_wait_fences)
+#define DRM_IOCTL_AMDGPU_SEM		DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_SEM, union drm_amdgpu_sem)
 
 #define AMDGPU_GEM_DOMAIN_CPU		0x1
 #define AMDGPU_GEM_DOMAIN_GTT		0x2
@@ -335,6 +337,26 @@ union drm_amdgpu_wait_fences {
 	struct drm_amdgpu_wait_fences_out out;
 };
 
+#define AMDGPU_SEM_OP_CREATE_SEM 0
+#define AMDGPU_SEM_OP_IMPORT_SEM 1
+#define AMDGPU_SEM_OP_EXPORT_SEM 2
+#define AMDGPU_SEM_OP_DESTROY_SEM 3
+
+struct drm_amdgpu_sem_in {
+	__u32 op;
+	__u32 handle;
+};
+
+struct drm_amdgpu_sem_out {
+	__u32 fd;
+	__u32 handle;
+};
+
+union drm_amdgpu_sem {
+	struct drm_amdgpu_sem_in in;
+	struct drm_amdgpu_sem_out out;
+};
+
 #define AMDGPU_GEM_OP_GET_GEM_CREATE_INFO	0
 #define AMDGPU_GEM_OP_SET_PLACEMENT		1
 
@@ -390,6 +412,8 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_CHUNK_ID_IB		0x01
 #define AMDGPU_CHUNK_ID_FENCE		0x02
 #define AMDGPU_CHUNK_ID_DEPENDENCIES	0x03
+#define AMDGPU_CHUNK_ID_SEM_WAIT        0x04
+#define AMDGPU_CHUNK_ID_SEM_SIGNAL      0x05
 
 struct drm_amdgpu_cs_chunk {
 	__u32		chunk_id;
@@ -454,6 +478,10 @@ struct drm_amdgpu_cs_chunk_fence {
 	__u32 offset;
 };
 
+struct drm_amdgpu_cs_chunk_sem {
+	__u32 handle;
+};
+
 struct drm_amdgpu_cs_chunk_data {
 	union {
 		struct drm_amdgpu_cs_chunk_ib		ib_data;
-- 
2.7.4

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] sync_file: add a mutex to protect fence and callback members. (v3)
       [not found]     ` <20170320070307.18344-2-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-03-20  8:16       ` Chris Wilson
  2017-03-20  8:41         ` Daniel Vetter
  2017-03-20  8:21       ` Chris Wilson
  1 sibling, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2017-03-20  8:16 UTC (permalink / raw)
  To: Dave Airlie
  Cc: dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Mon, Mar 20, 2017 at 05:03:04PM +1000, Dave Airlie wrote:
> From: Dave Airlie <airlied@redhat.com>
> 
> This patch allows the underlying fence in a sync_file to be changed
> or set to NULL. This isn't currently required but for Vulkan
> semaphores we need to be able to swap and reset the fence.
> 
> In order to faciliate this, it uses rcu to protect the fence,
> along with a new mutex. The mutex also protects the callback.
> It also checks for NULL when retrieving the rcu protected
> fence in case it has been reset.
> 
> v1.1: fix the locking (Julia Lawall).
> v2: use rcu try one
> v3: fix poll to use proper rcu, fixup merge/fill ioctls
> to not crash on NULL fence cases.
> 
> Signed-off-by: Dave Airlie <airlied@redhat.com>
> ---
> @@ -80,7 +87,9 @@ struct sync_file *sync_file_create(struct dma_fence *fence)
>  	if (!sync_file)
>  		return NULL;
>  
> -	sync_file->fence = dma_fence_get(fence);
> +	dma_fence_get(fence);
> +
> +	RCU_INIT_POINTER(sync_file->fence, fence);
>  
>  	snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d",
>  		 fence->ops->get_driver_name(fence),
> @@ -124,13 +133,26 @@ struct dma_fence *sync_file_get_fence(int fd)
>  	if (!sync_file)
>  		return NULL;
>  
> -	fence = dma_fence_get(sync_file->fence);
> +	if (!rcu_access_pointer(sync_file->fence))
> +		return NULL;

Missed fput.

> +
> +	rcu_read_lock();
> +	fence = dma_fence_get_rcu_safe(&sync_file->fence);
> +	rcu_read_unlock();
> +
>  	fput(sync_file->file);
>  
>  	return fence;
>  }
>  EXPORT_SYMBOL(sync_file_get_fence);
>  
> @@ -204,10 +234,16 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
>  	if (!sync_file)
>  		return NULL;
>  
> +	mutex_lock(&a->lock);
> +	mutex_lock(&b->lock);

This allows userspace to trigger lockdep (just merge the same pair of
sync_files again in opposite order). if (b < a) swap(a, b); ?

>  	a_fences = get_fences(a, &a_num_fences);
>  	b_fences = get_fences(b, &b_num_fences);
> -	if (a_num_fences > INT_MAX - b_num_fences)
> -		return NULL;
> +	if (!a_num_fences || !b_num_fences)
> +		goto unlock;
> +
> +	if (a_num_fences > INT_MAX - b_num_fences) {
> +		goto unlock;
> +	}
>  
>  	num_fences = a_num_fences + b_num_fences;
>  
> @@ -281,10 +323,15 @@ static void sync_file_free(struct kref *kref)
>  {
>  	struct sync_file *sync_file = container_of(kref, struct sync_file,
>  						     kref);
> +	struct dma_fence *fence;
> +

Somewhere, here?, it would be useful to add a comment that the rcu
delayed free is provided by fput.

> +	fence = rcu_dereference_protected(sync_file->fence, 1);
> +	if (fence) {
> +		if (test_bit(POLL_ENABLED, &fence->flags))
> +			dma_fence_remove_callback(fence, &sync_file->cb);
> +		dma_fence_put(fence);
> +	}
>  
> -	if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
> -		dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
> -	dma_fence_put(sync_file->fence);
>  	kfree(sync_file);
>  }
>  
> @@ -299,16 +346,25 @@ static int sync_file_release(struct inode *inode, struct file *file)
>  static unsigned int sync_file_poll(struct file *file, poll_table *wait)
>  {
>  	struct sync_file *sync_file = file->private_data;
> +	unsigned int ret_val = 0;
> +	struct dma_fence *fence;
>  
>  	poll_wait(file, &sync_file->wq, wait);
>  
> -	if (!test_and_set_bit(POLL_ENABLED, &sync_file->fence->flags)) {
> -		if (dma_fence_add_callback(sync_file->fence, &sync_file->cb,
> -					   fence_check_cb_func) < 0)
> -			wake_up_all(&sync_file->wq);
> +	mutex_lock(&sync_file->lock);
> +
> +	fence = sync_file_get_fence_locked(sync_file);

Why do you need the locked version here and not just the rcu variant?

> +	if (fence) {
> +		if (!test_and_set_bit(POLL_ENABLED, &fence->flags)) {
> +			if (dma_fence_add_callback(fence, &sync_file->cb,
> +						   fence_check_cb_func) < 0)
> +				wake_up_all(&sync_file->wq);
> +		}
> +		ret_val = dma_fence_is_signaled(fence) ? POLLIN : 0;
>  	}
> +	mutex_unlock(&sync_file->lock);

So an empty sync_file is incomplete and blocks forever? Why? It's the
opposite behaviour to e.g. reservation_object so a quick explanation of
how that is used by VkSemaphore will cement the differences.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] sync_file: add a mutex to protect fence and callback members. (v3)
       [not found]     ` <20170320070307.18344-2-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-03-20  8:16       ` Chris Wilson
@ 2017-03-20  8:21       ` Chris Wilson
  1 sibling, 0 replies; 14+ messages in thread
From: Chris Wilson @ 2017-03-20  8:21 UTC (permalink / raw)
  To: Dave Airlie
  Cc: dri-devel-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Mon, Mar 20, 2017 at 05:03:04PM +1000, Dave Airlie wrote:
> From: Dave Airlie <airlied@redhat.com>
> 
> This patch allows the underlying fence in a sync_file to be changed
> or set to NULL. This isn't currently required but for Vulkan
> semaphores we need to be able to swap and reset the fence.
> 
> In order to faciliate this, it uses rcu to protect the fence,
> along with a new mutex. The mutex also protects the callback.
> It also checks for NULL when retrieving the rcu protected
> fence in case it has been reset.
> 
> v1.1: fix the locking (Julia Lawall).
> v2: use rcu try one
> v3: fix poll to use proper rcu, fixup merge/fill ioctls
> to not crash on NULL fence cases.
> 
> Signed-off-by: Dave Airlie <airlied@redhat.com>
> ---
> @@ -124,13 +133,26 @@ struct dma_fence *sync_file_get_fence(int fd)
>  	if (!sync_file)
>  		return NULL;
>  
> -	fence = dma_fence_get(sync_file->fence);
> +	if (!rcu_access_pointer(sync_file->fence))
> +		return NULL;
> +
> +	rcu_read_lock();
> +	fence = dma_fence_get_rcu_safe(&sync_file->fence);
> +	rcu_read_unlock();
> +
>  	fput(sync_file->file);

So poll will wait until the fence is set before the sync_file is
signaled, but here we return NULL. At the moment this is interpretted by
the callers as an error (since we can't distinguish between the lookup
error and the empty sync_file). However, if it is empty we also want to
delay the dependent execution until the fence is set to match the poll
semantics.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] sync_file: add a mutex to protect fence and callback members. (v3)
  2017-03-20  8:16       ` Chris Wilson
@ 2017-03-20  8:41         ` Daniel Vetter
  0 siblings, 0 replies; 14+ messages in thread
From: Daniel Vetter @ 2017-03-20  8:41 UTC (permalink / raw)
  To: Chris Wilson, Dave Airlie, amd-gfx, dri-devel

On Mon, Mar 20, 2017 at 08:16:36AM +0000, Chris Wilson wrote:
> On Mon, Mar 20, 2017 at 05:03:04PM +1000, Dave Airlie wrote:
> > From: Dave Airlie <airlied@redhat.com>
> > 
> > This patch allows the underlying fence in a sync_file to be changed
> > or set to NULL. This isn't currently required but for Vulkan
> > semaphores we need to be able to swap and reset the fence.
> > 
> > In order to faciliate this, it uses rcu to protect the fence,
> > along with a new mutex. The mutex also protects the callback.
> > It also checks for NULL when retrieving the rcu protected
> > fence in case it has been reset.
> > 
> > v1.1: fix the locking (Julia Lawall).
> > v2: use rcu try one
> > v3: fix poll to use proper rcu, fixup merge/fill ioctls
> > to not crash on NULL fence cases.
> > 
> > Signed-off-by: Dave Airlie <airlied@redhat.com>
> > ---
> > @@ -80,7 +87,9 @@ struct sync_file *sync_file_create(struct dma_fence *fence)
> >  	if (!sync_file)
> >  		return NULL;
> >  
> > -	sync_file->fence = dma_fence_get(fence);
> > +	dma_fence_get(fence);
> > +
> > +	RCU_INIT_POINTER(sync_file->fence, fence);
> >  
> >  	snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d",
> >  		 fence->ops->get_driver_name(fence),
> > @@ -124,13 +133,26 @@ struct dma_fence *sync_file_get_fence(int fd)
> >  	if (!sync_file)
> >  		return NULL;
> >  
> > -	fence = dma_fence_get(sync_file->fence);
> > +	if (!rcu_access_pointer(sync_file->fence))
> > +		return NULL;
> 
> Missed fput.
> 
> > +
> > +	rcu_read_lock();
> > +	fence = dma_fence_get_rcu_safe(&sync_file->fence);
> > +	rcu_read_unlock();
> > +
> >  	fput(sync_file->file);
> >  
> >  	return fence;
> >  }
> >  EXPORT_SYMBOL(sync_file_get_fence);
> >  
> > @@ -204,10 +234,16 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
> >  	if (!sync_file)
> >  		return NULL;
> >  
> > +	mutex_lock(&a->lock);
> > +	mutex_lock(&b->lock);
> 
> This allows userspace to trigger lockdep (just merge the same pair of
> sync_files again in opposite order). if (b < a) swap(a, b); ?

Do we even need the look? 1) rcu-lookup the fences (which are invariant)
2) merge them 3) create new syncfile for them. I don't see a need for
taking locks here.

> 
> >  	a_fences = get_fences(a, &a_num_fences);
> >  	b_fences = get_fences(b, &b_num_fences);
> > -	if (a_num_fences > INT_MAX - b_num_fences)
> > -		return NULL;
> > +	if (!a_num_fences || !b_num_fences)
> > +		goto unlock;
> > +
> > +	if (a_num_fences > INT_MAX - b_num_fences) {
> > +		goto unlock;
> > +	}
> >  
> >  	num_fences = a_num_fences + b_num_fences;
> >  
> > @@ -281,10 +323,15 @@ static void sync_file_free(struct kref *kref)
> >  {
> >  	struct sync_file *sync_file = container_of(kref, struct sync_file,
> >  						     kref);
> > +	struct dma_fence *fence;
> > +
> 
> Somewhere, here?, it would be useful to add a comment that the rcu
> delayed free is provided by fput.
> 
> > +	fence = rcu_dereference_protected(sync_file->fence, 1);
> > +	if (fence) {
> > +		if (test_bit(POLL_ENABLED, &fence->flags))
> > +			dma_fence_remove_callback(fence, &sync_file->cb);
> > +		dma_fence_put(fence);
> > +	}
> >  
> > -	if (test_bit(POLL_ENABLED, &sync_file->fence->flags))
> > -		dma_fence_remove_callback(sync_file->fence, &sync_file->cb);
> > -	dma_fence_put(sync_file->fence);
> >  	kfree(sync_file);
> >  }
> >  
> > @@ -299,16 +346,25 @@ static int sync_file_release(struct inode *inode, struct file *file)
> >  static unsigned int sync_file_poll(struct file *file, poll_table *wait)
> >  {
> >  	struct sync_file *sync_file = file->private_data;
> > +	unsigned int ret_val = 0;
> > +	struct dma_fence *fence;
> >  
> >  	poll_wait(file, &sync_file->wq, wait);
> >  
> > -	if (!test_and_set_bit(POLL_ENABLED, &sync_file->fence->flags)) {
> > -		if (dma_fence_add_callback(sync_file->fence, &sync_file->cb,
> > -					   fence_check_cb_func) < 0)
> > -			wake_up_all(&sync_file->wq);
> > +	mutex_lock(&sync_file->lock);
> > +
> > +	fence = sync_file_get_fence_locked(sync_file);
> 
> Why do you need the locked version here and not just the rcu variant?

+1 :-) I think the lock should only be needed when you update the fence,
everywhere else we should be able to get away with rcu.

> > +	if (fence) {
> > +		if (!test_and_set_bit(POLL_ENABLED, &fence->flags)) {
> > +			if (dma_fence_add_callback(fence, &sync_file->cb,
> > +						   fence_check_cb_func) < 0)
> > +				wake_up_all(&sync_file->wq);
> > +		}
> > +		ret_val = dma_fence_is_signaled(fence) ? POLLIN : 0;
> >  	}
> > +	mutex_unlock(&sync_file->lock);
> 
> So an empty sync_file is incomplete and blocks forever? Why? It's the
> opposite behaviour to e.g. reservation_object so a quick explanation of
> how that is used by VkSemaphore will cement the differences.

Well if we reuse this for future fences then "not yet signalled" is kinda
what we want. In the end it doesn't matter until we wire up things
properly (i.e. block in sync_file_get_fence until the fence shows up).

Cheers, Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [rfc/repost] amdgpu/sync_file shared semaphores
  2017-03-20  7:03 [rfc/repost] amdgpu/sync_file shared semaphores Dave Airlie
                   ` (2 preceding siblings ...)
  2017-03-20  7:03 ` [PATCH 4/4] amdgpu: use sync file for shared semaphores Dave Airlie
@ 2017-03-20  8:42 ` Daniel Vetter
  3 siblings, 0 replies; 14+ messages in thread
From: Daniel Vetter @ 2017-03-20  8:42 UTC (permalink / raw)
  To: Dave Airlie; +Cc: dri-devel, amd-gfx

On Mon, Mar 20, 2017 at 05:03:03PM +1000, Dave Airlie wrote:
> This is a repost of the file_sync semaphore support.
> 
> The main difference in this patch is patch1 does a lot
> better at handling NULL fences in some places. The poll code
> and ioctls should handle ending up with fence being NULL properly now.

btw on this I realized that sync_file_get_fence can already return NULL,
so no problem really and no need for a dummy fence. Silly me :-)
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-03-20  8:42 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-03-20  7:03 [rfc/repost] amdgpu/sync_file shared semaphores Dave Airlie
2017-03-20  7:03 ` [PATCH 2/4] sync_file: add replace and export some functionality (v2) Dave Airlie
     [not found] ` <20170320070307.18344-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-03-20  7:03   ` [PATCH 1/4] sync_file: add a mutex to protect fence and callback members. (v3) Dave Airlie
     [not found]     ` <20170320070307.18344-2-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-03-20  8:16       ` Chris Wilson
2017-03-20  8:41         ` Daniel Vetter
2017-03-20  8:21       ` Chris Wilson
2017-03-20  7:03   ` [PATCH 3/4] amdgpu/cs: split out fence dependency checking Dave Airlie
2017-03-20  7:03 ` [PATCH 4/4] amdgpu: use sync file for shared semaphores Dave Airlie
2017-03-20  8:42 ` [rfc/repost] amdgpu/sync_file " Daniel Vetter
  -- strict thread matches above, loose matches on Subject: below --
2017-03-15  4:27 sync_file rcu adoption and semaphore changes Dave Airlie
     [not found] ` <20170315042749.13259-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-03-15  4:27   ` [PATCH 4/4] amdgpu: use sync file for shared semaphores Dave Airlie
2017-03-14  0:50 [rfc] amdgpu/sync_file " Dave Airlie
     [not found] ` <20170314005054.7487-1-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-03-14  0:50   ` [PATCH 4/4] amdgpu: use sync file for " Dave Airlie
     [not found]     ` <20170314005054.7487-6-airlied-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-03-15  9:01       ` Daniel Vetter
     [not found]         ` <20170315090129.fneo5gafrz2jec32-dv86pmgwkMBes7Z6vYuT8azUEOm+Xw19@public.gmane.org>
2017-03-15  9:43           ` Christian König
2017-03-15  9:57             ` Daniel Vetter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).