On Thu, Sep 18, 2014 at 11:11:57PM -0400, Jerome Glisse wrote: > On Thu, Sep 18, 2014 at 08:42:16PM -0400, Jerome Glisse wrote: > > On Thu, Sep 18, 2014 at 05:30:12PM +0200, Christian König wrote: > > > From: Christian König > > > > > > This patch adds a new 64bit ID as a result to each command submission. > > > > NAK for design reasons. > > > > First and foremost we DO NOT WANT TO ALLOCATE ANYTHING for submission id. > > I know that hooking up to fence make life easy but this is not how it > > should be done. > > > > What you want to expose is the per ring 32bit seq value and report to user > > space the value and the ring id. To handle the wrap around you add a uniq > > 32bit wrap counter which is incremented every 1 << 30 or some big number > > seq value number (but must be smaller than 1 << 31). Then you use arithmetic > > to determine if a cs seq number is done or not (ie the seq wrap around > > counter diff should not be bigger then 2 or 3) and the seq number should > > obviously be after again using arithmetic diff like jiffies code. > > > > All this being hidden in the kernel all the user space have to know is : > > 32 bit seq value per ring > > 32 bit ring uniq id > > 32 wrap counter per ring > > > > With such scheme you never allocate anything or worry about any fence. > > Way simpler and less code. > > > > Cheers, > > Jérôme > > Because code is clearer than words attached is a patch of what i am thinking > about. The arithmetic should be right as well as the coherency and proper > memory barrier. Thought it is completely untested and will require couple > fixes for properly checking userspace arguments and other aspect (see FIXME > in patches). > > There is no allocation, it is a pretty small patch, and it provide a fast > ligthweight solution. While the new ioctl argument and return value can be > improved (like reporting more information to userspace like for instance > warning userspace when it is querying very old fences). I think the overall > design is sound. > > Let me know. Ok actually after a beer (given that planet express manual for Bender, which also apply to me, clearly stipulate to expect malfunction if not properly inebriate) it became clear that by reversing the problem i could make it a lot simpler. So attached is an even simpler patch that handle gracefully user space querying for very old fence (ie that could have wrap around). Code explains it all. > > Cheers, > Jérôme > > > > > > > > > > > Signed-off-by: Christian König > > > --- > > > drivers/gpu/drm/radeon/Makefile | 2 +- > > > drivers/gpu/drm/radeon/radeon.h | 13 +- > > > drivers/gpu/drm/radeon/radeon_cs.c | 13 ++ > > > drivers/gpu/drm/radeon/radeon_kms.c | 41 +++---- > > > drivers/gpu/drm/radeon/radeon_seq.c | 229 ++++++++++++++++++++++++++++++++++++ > > > include/uapi/drm/radeon_drm.h | 1 + > > > 6 files changed, 277 insertions(+), 22 deletions(-) > > > create mode 100644 drivers/gpu/drm/radeon/radeon_seq.c > > > > > > diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile > > > index 8cc9f68..21ee928 100644 > > > --- a/drivers/gpu/drm/radeon/Makefile > > > +++ b/drivers/gpu/drm/radeon/Makefile > > > @@ -81,7 +81,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ > > > rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ > > > trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ > > > ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o \ > > > - radeon_sync.o > > > + radeon_sync.o radeon_seq.o > > > > > > # add async DMA block > > > radeon-y += \ > > > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > > > index b3b4e96..dbfd346 100644 > > > --- a/drivers/gpu/drm/radeon/radeon.h > > > +++ b/drivers/gpu/drm/radeon/radeon.h > > > @@ -423,6 +423,15 @@ static inline bool radeon_fence_is_earlier(struct radeon_fence *a, > > > } > > > > > > /* > > > + * Userspace command submission identifier generation > > > + */ > > > +struct radeon_seq; > > > + > > > +uint64_t radeon_seq_push(struct radeon_seq **seq, struct radeon_fence *fence); > > > +struct radeon_fence *radeon_seq_query(struct radeon_seq *seq, uint64_t id); > > > +void radeon_seq_destroy(struct radeon_seq **seq); > > > + > > > +/* > > > * Tiling registers > > > */ > > > struct radeon_surface_reg { > > > @@ -946,7 +955,9 @@ struct radeon_vm_manager { > > > * file private structure > > > */ > > > struct radeon_fpriv { > > > - struct radeon_vm vm; > > > + struct radeon_vm vm; > > > + struct mutex seq_lock; > > > + struct radeon_seq *seq; > > > }; > > > > > > /* > > > diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c > > > index b8f20a5..0c0f0b3 100644 > > > --- a/drivers/gpu/drm/radeon/radeon_cs.c > > > +++ b/drivers/gpu/drm/radeon/radeon_cs.c > > > @@ -413,6 +413,19 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo > > > unsigned i; > > > > > > if (!error) { > > > + if (parser->chunk_flags && > > > + parser->chunk_flags->length_dw > 4) { > > > + struct radeon_fpriv *fpriv = parser->filp->driver_priv; > > > + uint32_t __user *to = parser->chunk_flags->user_ptr; > > > + uint64_t id; > > > + > > > + mutex_lock(&fpriv->seq_lock); > > > + id = radeon_seq_push(&fpriv->seq, parser->ib.fence); > > > + mutex_unlock(&fpriv->seq_lock); > > > + > > > + copy_to_user(&to[3], &id, sizeof(uint64_t)); > > > + } > > > + > > > /* Sort the buffer list from the smallest to largest buffer, > > > * which affects the order of buffers in the LRU list. > > > * This assures that the smallest buffers are added first > > > diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c > > > index 85ee6f7..38880af 100644 > > > --- a/drivers/gpu/drm/radeon/radeon_kms.c > > > +++ b/drivers/gpu/drm/radeon/radeon_kms.c > > > @@ -578,39 +578,34 @@ void radeon_driver_lastclose_kms(struct drm_device *dev) > > > */ > > > int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) > > > { > > > + struct radeon_fpriv *fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL); > > > struct radeon_device *rdev = dev->dev_private; > > > int r; > > > > > > - file_priv->driver_priv = NULL; > > > + if (unlikely(!fpriv)) > > > + return -ENOMEM; > > > + > > > + file_priv->driver_priv = fpriv; > > > > > > r = pm_runtime_get_sync(dev->dev); > > > if (r < 0) > > > - return r; > > > + goto error; > > > > > > /* new gpu have virtual address space support */ > > > if (rdev->family >= CHIP_CAYMAN) { > > > - struct radeon_fpriv *fpriv; > > > struct radeon_vm *vm; > > > int r; > > > > > > - fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL); > > > - if (unlikely(!fpriv)) { > > > - return -ENOMEM; > > > - } > > > - > > > vm = &fpriv->vm; > > > r = radeon_vm_init(rdev, vm); > > > - if (r) { > > > - kfree(fpriv); > > > - return r; > > > - } > > > + if (r) > > > + goto error; > > > > > > if (rdev->accel_working) { > > > r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); > > > if (r) { > > > radeon_vm_fini(rdev, vm); > > > - kfree(fpriv); > > > - return r; > > > + goto error; > > > } > > > > > > /* map the ib pool buffer read only into > > > @@ -623,16 +618,20 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) > > > RADEON_VM_PAGE_SNOOPED); > > > if (r) { > > > radeon_vm_fini(rdev, vm); > > > - kfree(fpriv); > > > - return r; > > > + goto error; > > > } > > > } > > > - file_priv->driver_priv = fpriv; > > > } > > > > > > + mutex_init(&fpriv->seq_lock); > > > + > > > pm_runtime_mark_last_busy(dev->dev); > > > pm_runtime_put_autosuspend(dev->dev); > > > return 0; > > > + > > > +error: > > > + kfree(fpriv); > > > + return r; > > > } > > > > > > /** > > > @@ -646,11 +645,13 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) > > > void radeon_driver_postclose_kms(struct drm_device *dev, > > > struct drm_file *file_priv) > > > { > > > + struct radeon_fpriv *fpriv = file_priv->driver_priv; > > > struct radeon_device *rdev = dev->dev_private; > > > > > > + radeon_seq_destroy(&fpriv->seq); > > > + > > > /* new gpu have virtual address space support */ > > > if (rdev->family >= CHIP_CAYMAN && file_priv->driver_priv) { > > > - struct radeon_fpriv *fpriv = file_priv->driver_priv; > > > struct radeon_vm *vm = &fpriv->vm; > > > int r; > > > > > > @@ -664,9 +665,9 @@ void radeon_driver_postclose_kms(struct drm_device *dev, > > > } > > > > > > radeon_vm_fini(rdev, vm); > > > - kfree(fpriv); > > > - file_priv->driver_priv = NULL; > > > } > > > + kfree(fpriv); > > > + file_priv->driver_priv = NULL; > > > } > > > > > > /** > > > diff --git a/drivers/gpu/drm/radeon/radeon_seq.c b/drivers/gpu/drm/radeon/radeon_seq.c > > > new file mode 100644 > > > index 0000000..d8857f1 > > > --- /dev/null > > > +++ b/drivers/gpu/drm/radeon/radeon_seq.c > > > @@ -0,0 +1,229 @@ > > > +/* > > > + * Copyright 2014 Advanced Micro Devices, Inc. > > > + * All Rights Reserved. > > > + * > > > + * Permission is hereby granted, free of charge, to any person obtaining a > > > + * copy of this software and associated documentation files (the > > > + * "Software"), to deal in the Software without restriction, including > > > + * without limitation the rights to use, copy, modify, merge, publish, > > > + * distribute, sub license, and/or sell copies of the Software, and to > > > + * permit persons to whom the Software is furnished to do so, subject to > > > + * the following conditions: > > > + * > > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > > > + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL > > > + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, > > > + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR > > > + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE > > > + * USE OR OTHER DEALINGS IN THE SOFTWARE. > > > + * > > > + * The above copyright notice and this permission notice (including the > > > + * next paragraph) shall be included in all copies or substantial portions > > > + * of the Software. > > > + * > > > + */ > > > +/* > > > + * Authors: > > > + * Christian König > > > + */ > > > + > > > +#include > > > +#include "radeon.h" > > > + > > > +/* > > > + * ID sequences > > > + * This code generates a 64bit identifier for a command submission. > > > + * It works by adding the fence of the command submission to a automatically > > > + * resizing ring buffer. > > > + */ > > > + > > > +struct radeon_seq { > > > + uint64_t start; > > > + uint64_t end; > > > + uint64_t mask; > > > + struct radeon_seq *replacement; > > > +}; > > > + > > > +/** > > > + * radeon_seq_create - create a new sequence object > > > + * > > > + * @start: start value for this sequence > > > + * @size: size of the ring buffer, must be power of two > > > + * > > > + * Allocate and initialize a new ring buffer and header. > > > + * Returns NULL if allocation fails, new object otherwise. > > > + */ > > > +static struct radeon_seq *radeon_seq_create(uint64_t start, unsigned size) > > > +{ > > > + unsigned bytes = sizeof(struct radeon_seq) + > > > + size * sizeof(struct radeon_fence *); > > > + > > > + struct radeon_seq *seq; > > > + > > > + seq = kmalloc(bytes, GFP_KERNEL); > > > + if (!seq) > > > + return NULL; > > > + > > > + seq->start = start; > > > + seq->end = start; > > > + seq->mask = size - 1; > > > + seq->replacement = NULL; > > > + > > > + return seq; > > > +} > > > + > > > +/** > > > + * radeon_seq_ring - get pointer to ring buffer > > > + * > > > + * @seq: sequence object > > > + * > > > + * Calculate the address of the ring buffer. > > > + */ > > > +static struct radeon_fence **radeon_seq_ring(struct radeon_seq *seq) > > > +{ > > > + return (struct radeon_fence **)&seq[1]; > > > +} > > > + > > > +/** > > > + * radeon_seq_try_free - try to free fences from the ring buffer > > > + * > > > + * @seq: sequence object > > > + * > > > + * Try to free fences from the start of the ring buffer. > > > + */ > > > +static void radeon_seq_try_free(struct radeon_seq *seq) > > > +{ > > > + struct radeon_fence **ring = radeon_seq_ring(seq); > > > + > > > + while (seq->start != seq->end) { > > > + unsigned idx = seq->start & seq->mask; > > > + struct radeon_fence *fence = ring[idx]; > > > + > > > + if (!radeon_fence_signaled(fence)) > > > + break; > > > + > > > + radeon_fence_unref(&fence); > > > + ++seq->start; > > > + } > > > +} > > > + > > > +/** > > > + * radeon_seq_add - add new fence to the end of the ring buffer > > > + * > > > + * @seq: sequence object > > > + * @f: the fence object > > > + * > > > + * Add the fence and return the generated ID. > > > + */ > > > +static uint64_t radeon_seq_add(struct radeon_seq *seq, struct radeon_fence *f) > > > +{ > > > + struct radeon_fence **ring = radeon_seq_ring(seq); > > > + > > > + ring[seq->end & seq->mask] = radeon_fence_ref(f); > > > + return seq->end++; > > > +} > > > + > > > +/** > > > + * radeon_seq_push - check for room and add the fence > > > + * > > > + * @seq: sequence object > > > + * @fence: the fence object > > > + * > > > + * Check for room on the ring buffer, if there isn't enough > > > + * reallocate the sequence object and add the fence. > > > + * Returns the generated ID. > > > + */ > > > +uint64_t radeon_seq_push(struct radeon_seq **seq, struct radeon_fence *fence) > > > +{ > > > + unsigned size_for_new_seq = 4; > > > + uint64_t start_for_new_seq = 1; > > > + > > > + if (*seq) { > > > + /* try to release old replacements */ > > > + while ((*seq)->replacement) { > > > + radeon_seq_try_free(*seq); > > > + if ((*seq)->start == (*seq)->end) { > > > + struct radeon_seq *repl = (*seq)->replacement; > > > + > > > + kfree(*seq); > > > + *seq = repl; > > > + } else { > > > + /* move on to the current container */ > > > + seq = &(*seq)->replacement; > > > + } > > > + } > > > + > > > + /* check if we have enough room for one more fence */ > > > + radeon_seq_try_free(*seq); > > > + if (((*seq)->end - (*seq)->start) <= (*seq)->mask) > > > + return radeon_seq_add(*seq, fence); > > > + > > > + /* not enough room, let's allocate a replacement */ > > > + size_for_new_seq = ((*seq)->mask + 1) * 2; > > > + start_for_new_seq = (*seq)->end + 1; > > > + seq = &(*seq)->replacement; > > > + } > > > + > > > + *seq = radeon_seq_create(start_for_new_seq, size_for_new_seq); > > > + if (!*seq) { > > > + /* not enough memory for a new sequence object, but failing > > > + here isn't a good idea either cause the commands are already > > > + submitted to the hardware. So just block on the fence. */ > > > + int r = radeon_fence_wait(fence, false); > > > + if (r) > > > + DRM_ERROR("Error waiting for fence (%d)\n", r); > > > + return 0; > > > + } > > > + return radeon_seq_add(*seq, fence); > > > +} > > > + > > > +/** > > > + * radeon_seq_query - lockup fence by it's ID > > > + * > > > + * @seq: sequence object > > > + * @id: the generated ID > > > + * > > > + * Lockup the associated fence by it's ID. > > > + * Returns fence object or NULL if it couldn't be found. > > > + */ > > > +struct radeon_fence *radeon_seq_query(struct radeon_seq *seq, uint64_t id) > > > +{ > > > + struct radeon_fence **ring; > > > + > > > + while (seq && id > seq->end) > > > + seq = seq->replacement; > > > + > > > + if (!seq || id < seq->start) > > > + return NULL; > > > + > > > + ring = radeon_seq_ring(seq); > > > + return ring[id & seq->mask]; > > > +} > > > + > > > +/** > > > + * radeon_seq_destroy - destroy the sequence object > > > + * > > > + * @seq_ptr: pointer to sequence object > > > + * > > > + * Destroy the sequence objects and release all fence references taken. > > > + */ > > > +void radeon_seq_destroy(struct radeon_seq **seq_ptr) > > > +{ > > > + struct radeon_seq *seq = *seq_ptr; > > > + while (seq) { > > > + struct radeon_seq *repl = seq->replacement; > > > + unsigned start = seq->start & seq->mask; > > > + unsigned end = seq->end & seq->mask; > > > + struct radeon_fence **ring; > > > + unsigned i; > > > + > > > + ring = radeon_seq_ring(seq); > > > + for (i = start; i < end; ++i) > > > + radeon_fence_unref(&ring[i]); > > > + > > > + kfree(seq); > > > + seq = repl; > > > + } > > > + *seq_ptr = NULL; > > > +} > > > diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h > > > index 50d0fb4..6b2b2e7 100644 > > > --- a/include/uapi/drm/radeon_drm.h > > > +++ b/include/uapi/drm/radeon_drm.h > > > @@ -959,6 +959,7 @@ struct drm_radeon_gem_va { > > > #define RADEON_CS_RING_VCE 4 > > > /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */ > > > /* 0 = normal, + = higher priority, - = lower priority */ > > > +/* The fourth and fives dword are a 64bit fence ID generated for this CS */ > > > > > > struct drm_radeon_cs_chunk { > > > uint32_t chunk_id; > > > -- > > > 1.9.1 > > > > > > _______________________________________________ > > > dri-devel mailing list > > > dri-devel@lists.freedesktop.org > > > http://lists.freedesktop.org/mailman/listinfo/dri-devel > From b95fe503dad89875da9db2d11d05f93eca41232d Mon Sep 17 00:00:00 2001 > From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= > Date: Thu, 18 Sep 2014 22:51:21 -0400 > Subject: [PATCH] drm/radeon: cs sequence id and cs completion query. > MIME-Version: 1.0 > Content-Type: text/plain; charset=UTF-8 > Content-Transfer-Encoding: 8bit > > This report back to userspace ring id and sequence number that can > be use by userspace to query for the completetion of the cs on the > hardware. This also add a new ioctl to perform such query. > > There is an extra value supplied to userspace the wrap counter that > is use to detect wrap around and to gracefuly handle the case where > userspace might be querying about a very, very, very old cs executed > long time ago in a far far away universe. > > This patch is aimed to introduce the necessary ground work for user > space explicit synchronization. By allowing userspace to query about > cs completetion on hardware, user space can perform operation and > synchronization on buffer by itself without having the cs ioctl to > implicitly wait for older cs completetion before scheduling new cs. > This part is however left to a follow-up patch. > > Signed-off-by: Jérôme Glisse > --- > drivers/gpu/drm/radeon/radeon.h | 2 ++ > drivers/gpu/drm/radeon/radeon_cs.c | 60 +++++++++++++++++++++++++++++++++++ > drivers/gpu/drm/radeon/radeon_fence.c | 4 +++ > drivers/gpu/drm/radeon/radeon_kms.c | 1 + > include/uapi/drm/radeon_drm.h | 9 ++++++ > 5 files changed, 76 insertions(+) > > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index 5f05b4c..1f1dd1f 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -118,6 +118,7 @@ extern int radeon_bapm; > #define RADEON_DEBUGFS_MAX_COMPONENTS 32 > #define RADEONFB_CONN_LIMIT 4 > #define RADEON_BIOS_NUM_SCRATCH 8 > +#define RADEON_SEQ_WRAP_VALUE (1 << 30) > > /* fence seq are set to this number when signaled */ > #define RADEON_FENCE_SIGNALED_SEQ 0LL > @@ -355,6 +356,7 @@ struct radeon_fence_driver { > /* sync_seq is protected by ring emission lock */ > uint64_t sync_seq[RADEON_NUM_RINGS]; > atomic64_t last_seq; > + int32_t wrap_seq; > bool initialized; > }; > > diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c > index 83f382e..be4ae25 100644 > --- a/drivers/gpu/drm/radeon/radeon_cs.c > +++ b/drivers/gpu/drm/radeon/radeon_cs.c > @@ -404,6 +404,17 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo > ttm_eu_fence_buffer_objects(&parser->ticket, > &parser->validated, > parser->ib.fence); > + if (parser->chunk_flags && parser->chunk_flags->length_dw > 4) { > + uint32_t __user *to = parser->chunk_flags->user_ptr; > + uint32_t tmp; > + > + tmp = lower_32_bits(parser->ib.fence->seq); > + copy_to_user(&to[3], &tmp, sizeof(uint32_t)); > + tmp = parser->ib.fence->ring; > + copy_to_user(&to[4], &tmp, sizeof(uint32_t)); > + tmp = rdev->fence_drv[tmp]->wrap_seq; > + copy_to_user(&to[5], &tmp, sizeof(uint32_t)); > + } > } else if (backoff) { > ttm_eu_backoff_reservation(&parser->ticket, > &parser->validated); > @@ -823,3 +834,52 @@ int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p, > *cs_reloc = p->relocs_ptr[(idx / 4)]; > return 0; > } > + > +int radeon_cs_done_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) > +{ > + struct radeon_device *rdev = dev->dev_private; > + struct drm_radeon_cs_done *args = data; > + unsigned i = args->ring; > + int32_t last_seq, sync_seq, wrap_seq; > + > + /* FIXME check args->ring value is ok. */ > + > + /* > + * The memory barrier is match with the one in radeon_fence_emit() and > + * it insure us that we get the right matching wrap_seq and sync_seq. > + * > + * Note that no need to protect the fence_drv.sync_seq here as barrier > + * insure us we will get the coherency we need. > + */ > + wrap_seq = ACCESS_ONCE(rdev->fence_drv[i].wrap_seq); > + smp_rmb(); > + sync_seq = lower_32_bits(ACCESS_ONCE(rdev->fence_drv[i].sync_seq[i])); > + > + /* > + * So if current wrap_seq and one we are queried with are differ by > + * more than one this means that we are queried about a very old fence > + * seq value and we can assume it is long done. > + * > + * Well this is not entirely true, for it to be true we would need to > + * stall when we increment the wrap counter if cs in previous wrap were > + * not completed but this is highly unlikely. So live with the trill of > + * it going wrong ! > + */ > + if (abs((unsigned)wrap_seq - (unsigned)args->wrap) > 1) > + return 1; > + /* Now check if currently reported fence seq is done or not. */ > + /* FIXME call fence func to update last_seq just in case. */ > + last_seq = lower_32_bits(atomic64_read(&rdev->fence_drv[i].last_seq)); > + if ((last_seq - arg->seq) >= 0) > + return 1; > + /* > + * Last failsafe to handle the horrible case were userspace holded on > + * a wrap and seq value for so long without querying that it we wrapped > + * around. This is here to avoid userspace waiting for a fence that was > + * emited a long time ago but the current sync_seq[ring] value migth be > + * stuck and thus we might go bigger than this very old seq value. > + */ > + if (((sync_seq - args->seq) < 0) && args->wrap == wrap_seq) > + return 1; > + return 0; > +} > diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c > index 9137870..a6adcf6 100644 > --- a/drivers/gpu/drm/radeon/radeon_fence.c > +++ b/drivers/gpu/drm/radeon/radeon_fence.c > @@ -119,6 +119,10 @@ int radeon_fence_emit(struct radeon_device *rdev, > kref_init(&((*fence)->kref)); > (*fence)->rdev = rdev; > (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring]; > + /* Barrier is important for radeon_fence_cs_done_ioctl. */ > + smp_wmb(); > + if (rdev->fence_drv[ring].sync_seq[ring] == RADEON_SEQ_WRAP_VALUE) > + rdev->fence_drv[ring].wrap_seq++; > (*fence)->ring = ring; > radeon_fence_ring_emit(rdev, ring, *fence); > trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); > diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c > index eb7164d..c9cfcf5 100644 > --- a/drivers/gpu/drm/radeon/radeon_kms.c > +++ b/drivers/gpu/drm/radeon/radeon_kms.c > @@ -885,5 +885,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = { > DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), > DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), > + DRM_IOCTL_DEF_DRV(RADEON_CS_DONE, radeon_cs_done_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), > }; > int radeon_max_kms_ioctl = ARRAY_SIZE(radeon_ioctls_kms); > diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h > index fea6099..a0c215d 100644 > --- a/include/uapi/drm/radeon_drm.h > +++ b/include/uapi/drm/radeon_drm.h > @@ -554,6 +554,7 @@ typedef struct { > #define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) > #define DRM_IOCTL_RADEON_GEM_VA DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_VA, struct drm_radeon_gem_va) > #define DRM_IOCTL_RADEON_GEM_OP DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_OP, struct drm_radeon_gem_op) > +#define DRM_IOCTL_RADEON_CS_DONE DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS_DONE, struct drm_radeon_cs_done) > > typedef struct drm_radeon_init { > enum { > @@ -936,6 +937,7 @@ struct drm_radeon_gem_va { > #define RADEON_CS_RING_VCE 4 > /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */ > /* 0 = normal, + = higher priority, - = lower priority */ > +/* The fifth, sixth, seventh dword are a 32bit fence ID, ring id and wrap id of this CS */ > > struct drm_radeon_cs_chunk { > uint32_t chunk_id; > @@ -1038,4 +1040,11 @@ struct drm_radeon_info { > > #define CIK_TILE_MODE_DEPTH_STENCIL_1D 5 > > +struct drm_radeon_cs_done { > + int32_t seq; > + int32_t ring; > + int32_t wrap; > + int32_t pad; > +}; > + > #endif > -- > 1.9.3 >