* [PATCH 1/4] drm/vmwgfx: Replace iowrite/ioread with volatile memory accesses
@ 2015-10-30 9:42 Thomas Hellstrom
2015-10-30 9:42 ` [PATCH 2/4] drm/vmwgfx: Defer fence irq processing to a tasklet Thomas Hellstrom
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Thomas Hellstrom @ 2015-10-30 9:42 UTC (permalink / raw)
To: dri-devel; +Cc: Thomas Hellstrom
Now that we use memremap instead of ioremap, Use WRITE_ONCE / READ_ONCE
instead of iowrite / ioread.
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 14 ++---
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 28 ++++++++-
drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 24 ++++----
drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c | 109 ++++++++++++++++------------------
drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 16 ++---
drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 9 +--
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 23 +++----
7 files changed, 118 insertions(+), 105 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index bee0a45..d1c34ab 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -752,14 +752,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM);
dev_priv->active_master = &dev_priv->fbdev_master;
- /*
- * Force __iomem for this mapping until the implied compiler
- * barriers and {READ|WRITE}_ONCE semantics from the
- * io{read|write}32() accessors can be replaced with explicit
- * barriers.
- */
- dev_priv->mmio_virt = (void __iomem *) memremap(dev_priv->mmio_start,
- dev_priv->mmio_size, MEMREMAP_WB);
+ dev_priv->mmio_virt = memremap(dev_priv->mmio_start,
+ dev_priv->mmio_size, MEMREMAP_WB);
if (unlikely(dev_priv->mmio_virt == NULL)) {
ret = -ENOMEM;
@@ -913,7 +907,7 @@ out_no_irq:
out_no_device:
ttm_object_device_release(&dev_priv->tdev);
out_err4:
- memunmap((void __force *) dev_priv->mmio_virt);
+ memunmap(dev_priv->mmio_virt);
out_err3:
vmw_ttm_global_release(dev_priv);
out_err0:
@@ -964,7 +958,7 @@ static int vmw_driver_unload(struct drm_device *dev)
pci_release_regions(dev->pdev);
ttm_object_device_release(&dev_priv->tdev);
- memunmap((void __force *) dev_priv->mmio_virt);
+ memunmap(dev_priv->mmio_virt);
if (dev_priv->ctx.staged_bindings)
vmw_binding_state_free(dev_priv->ctx.staged_bindings);
vmw_ttm_global_release(dev_priv);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index a613bd4..198c8b1 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -375,7 +375,7 @@ struct vmw_private {
uint32_t stdu_max_height;
uint32_t initial_width;
uint32_t initial_height;
- u32 __iomem *mmio_virt;
+ u32 *mmio_virt;
uint32_t capabilities;
uint32_t max_gmr_ids;
uint32_t max_gmr_pages;
@@ -1206,4 +1206,30 @@ static inline void vmw_fifo_resource_dec(struct vmw_private *dev_priv)
{
atomic_dec(&dev_priv->num_fifo_resources);
}
+
+/**
+ * vmw_mmio_read - Perform a MMIO read from volatile memory
+ *
+ * @addr: The address to read from
+ *
+ * This function is intended to be equivalent to ioread32() on
+ * memremap'd memory, but without byteswapping.
+ */
+static inline u32 vmw_mmio_read(u32 *addr)
+{
+ return READ_ONCE(*addr);
+}
+
+/**
+ * vmw_mmio_write - Perform a MMIO write to volatile memory
+ *
+ * @addr: The address to write to
+ *
+ * This function is intended to be equivalent to iowrite32 on
+ * memremap'd memory, but without byteswapping.
+ */
+static inline void vmw_mmio_write(u32 value, u32 *addr)
+{
+ WRITE_ONCE(*addr, value);
+}
#endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 567dded..8e689b4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -142,8 +142,8 @@ static bool vmw_fence_enable_signaling(struct fence *f)
struct vmw_fence_manager *fman = fman_from_fence(fence);
struct vmw_private *dev_priv = fman->dev_priv;
- u32 __iomem *fifo_mem = dev_priv->mmio_virt;
- u32 seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+ u32 *fifo_mem = dev_priv->mmio_virt;
+ u32 seqno = vmw_mmio_read(fifo_mem + SVGA_FIFO_FENCE);
if (seqno - fence->base.seqno < VMW_FENCE_WRAP)
return false;
@@ -386,14 +386,14 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
u32 passed_seqno)
{
u32 goal_seqno;
- u32 __iomem *fifo_mem;
+ u32 *fifo_mem;
struct vmw_fence_obj *fence;
if (likely(!fman->seqno_valid))
return false;
fifo_mem = fman->dev_priv->mmio_virt;
- goal_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE_GOAL);
+ goal_seqno = vmw_mmio_read(fifo_mem + SVGA_FIFO_FENCE_GOAL);
if (likely(passed_seqno - goal_seqno >= VMW_FENCE_WRAP))
return false;
@@ -401,8 +401,8 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
list_for_each_entry(fence, &fman->fence_list, head) {
if (!list_empty(&fence->seq_passed_actions)) {
fman->seqno_valid = true;
- iowrite32(fence->base.seqno,
- fifo_mem + SVGA_FIFO_FENCE_GOAL);
+ vmw_mmio_write(fence->base.seqno,
+ fifo_mem + SVGA_FIFO_FENCE_GOAL);
break;
}
}
@@ -430,18 +430,18 @@ static bool vmw_fence_goal_check_locked(struct vmw_fence_obj *fence)
{
struct vmw_fence_manager *fman = fman_from_fence(fence);
u32 goal_seqno;
- u32 __iomem *fifo_mem;
+ u32 *fifo_mem;
if (fence_is_signaled_locked(&fence->base))
return false;
fifo_mem = fman->dev_priv->mmio_virt;
- goal_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE_GOAL);
+ goal_seqno = vmw_mmio_read(fifo_mem + SVGA_FIFO_FENCE_GOAL);
if (likely(fman->seqno_valid &&
goal_seqno - fence->base.seqno < VMW_FENCE_WRAP))
return false;
- iowrite32(fence->base.seqno, fifo_mem + SVGA_FIFO_FENCE_GOAL);
+ vmw_mmio_write(fence->base.seqno, fifo_mem + SVGA_FIFO_FENCE_GOAL);
fman->seqno_valid = true;
return true;
@@ -453,9 +453,9 @@ static void __vmw_fences_update(struct vmw_fence_manager *fman)
struct list_head action_list;
bool needs_rerun;
uint32_t seqno, new_seqno;
- u32 __iomem *fifo_mem = fman->dev_priv->mmio_virt;
+ u32 *fifo_mem = fman->dev_priv->mmio_virt;
- seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+ seqno = vmw_mmio_read(fifo_mem + SVGA_FIFO_FENCE);
rerun:
list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) {
if (seqno - fence->base.seqno < VMW_FENCE_WRAP) {
@@ -477,7 +477,7 @@ rerun:
needs_rerun = vmw_fence_goal_new_locked(fman, seqno);
if (unlikely(needs_rerun)) {
- new_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+ new_seqno = vmw_mmio_read(fifo_mem + SVGA_FIFO_FENCE);
if (new_seqno != seqno) {
seqno = new_seqno;
goto rerun;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index 80c40c3..0cbaf88 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -36,7 +36,7 @@ struct vmw_temp_set_context {
bool vmw_fifo_have_3d(struct vmw_private *dev_priv)
{
- u32 __iomem *fifo_mem = dev_priv->mmio_virt;
+ u32 *fifo_mem = dev_priv->mmio_virt;
uint32_t fifo_min, hwversion;
const struct vmw_fifo_state *fifo = &dev_priv->fifo;
@@ -60,15 +60,15 @@ bool vmw_fifo_have_3d(struct vmw_private *dev_priv)
if (!(dev_priv->capabilities & SVGA_CAP_EXTENDED_FIFO))
return false;
- fifo_min = ioread32(fifo_mem + SVGA_FIFO_MIN);
+ fifo_min = vmw_mmio_read(fifo_mem + SVGA_FIFO_MIN);
if (fifo_min <= SVGA_FIFO_3D_HWVERSION * sizeof(unsigned int))
return false;
- hwversion = ioread32(fifo_mem +
- ((fifo->capabilities &
- SVGA_FIFO_CAP_3D_HWVERSION_REVISED) ?
- SVGA_FIFO_3D_HWVERSION_REVISED :
- SVGA_FIFO_3D_HWVERSION));
+ hwversion = vmw_mmio_read(fifo_mem +
+ ((fifo->capabilities &
+ SVGA_FIFO_CAP_3D_HWVERSION_REVISED) ?
+ SVGA_FIFO_3D_HWVERSION_REVISED :
+ SVGA_FIFO_3D_HWVERSION));
if (hwversion == 0)
return false;
@@ -85,13 +85,13 @@ bool vmw_fifo_have_3d(struct vmw_private *dev_priv)
bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv)
{
- u32 __iomem *fifo_mem = dev_priv->mmio_virt;
+ u32 *fifo_mem = dev_priv->mmio_virt;
uint32_t caps;
if (!(dev_priv->capabilities & SVGA_CAP_EXTENDED_FIFO))
return false;
- caps = ioread32(fifo_mem + SVGA_FIFO_CAPABILITIES);
+ caps = vmw_mmio_read(fifo_mem + SVGA_FIFO_CAPABILITIES);
if (caps & SVGA_FIFO_CAP_PITCHLOCK)
return true;
@@ -100,7 +100,7 @@ bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv)
int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
{
- u32 __iomem *fifo_mem = dev_priv->mmio_virt;
+ u32 *fifo_mem = dev_priv->mmio_virt;
uint32_t max;
uint32_t min;
@@ -137,19 +137,19 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
if (min < PAGE_SIZE)
min = PAGE_SIZE;
- iowrite32(min, fifo_mem + SVGA_FIFO_MIN);
- iowrite32(dev_priv->mmio_size, fifo_mem + SVGA_FIFO_MAX);
+ vmw_mmio_write(min, fifo_mem + SVGA_FIFO_MIN);
+ vmw_mmio_write(dev_priv->mmio_size, fifo_mem + SVGA_FIFO_MAX);
wmb();
- iowrite32(min, fifo_mem + SVGA_FIFO_NEXT_CMD);
- iowrite32(min, fifo_mem + SVGA_FIFO_STOP);
- iowrite32(0, fifo_mem + SVGA_FIFO_BUSY);
+ vmw_mmio_write(min, fifo_mem + SVGA_FIFO_NEXT_CMD);
+ vmw_mmio_write(min, fifo_mem + SVGA_FIFO_STOP);
+ vmw_mmio_write(0, fifo_mem + SVGA_FIFO_BUSY);
mb();
vmw_write(dev_priv, SVGA_REG_CONFIG_DONE, 1);
- max = ioread32(fifo_mem + SVGA_FIFO_MAX);
- min = ioread32(fifo_mem + SVGA_FIFO_MIN);
- fifo->capabilities = ioread32(fifo_mem + SVGA_FIFO_CAPABILITIES);
+ max = vmw_mmio_read(fifo_mem + SVGA_FIFO_MAX);
+ min = vmw_mmio_read(fifo_mem + SVGA_FIFO_MIN);
+ fifo->capabilities = vmw_mmio_read(fifo_mem + SVGA_FIFO_CAPABILITIES);
DRM_INFO("Fifo max 0x%08x min 0x%08x cap 0x%08x\n",
(unsigned int) max,
@@ -157,7 +157,7 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
(unsigned int) fifo->capabilities);
atomic_set(&dev_priv->marker_seq, dev_priv->last_read_seqno);
- iowrite32(dev_priv->last_read_seqno, fifo_mem + SVGA_FIFO_FENCE);
+ vmw_mmio_write(dev_priv->last_read_seqno, fifo_mem + SVGA_FIFO_FENCE);
vmw_marker_queue_init(&fifo->marker_queue);
return 0;
@@ -165,31 +165,23 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
{
- u32 __iomem *fifo_mem = dev_priv->mmio_virt;
- static DEFINE_SPINLOCK(ping_lock);
- unsigned long irq_flags;
+ u32 *fifo_mem = dev_priv->mmio_virt;
- /*
- * The ping_lock is needed because we don't have an atomic
- * test-and-set of the SVGA_FIFO_BUSY register.
- */
- spin_lock_irqsave(&ping_lock, irq_flags);
- if (unlikely(ioread32(fifo_mem + SVGA_FIFO_BUSY) == 0)) {
- iowrite32(1, fifo_mem + SVGA_FIFO_BUSY);
+ preempt_disable();
+ if (cmpxchg(fifo_mem + SVGA_FIFO_BUSY, 0, 1) == 0)
vmw_write(dev_priv, SVGA_REG_SYNC, reason);
- }
- spin_unlock_irqrestore(&ping_lock, irq_flags);
+ preempt_enable();
}
void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
{
- u32 __iomem *fifo_mem = dev_priv->mmio_virt;
+ u32 *fifo_mem = dev_priv->mmio_virt;
vmw_write(dev_priv, SVGA_REG_SYNC, SVGA_SYNC_GENERIC);
while (vmw_read(dev_priv, SVGA_REG_BUSY) != 0)
;
- dev_priv->last_read_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+ dev_priv->last_read_seqno = vmw_mmio_read(fifo_mem + SVGA_FIFO_FENCE);
vmw_write(dev_priv, SVGA_REG_CONFIG_DONE,
dev_priv->config_done_state);
@@ -213,11 +205,11 @@ void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
static bool vmw_fifo_is_full(struct vmw_private *dev_priv, uint32_t bytes)
{
- u32 __iomem *fifo_mem = dev_priv->mmio_virt;
- uint32_t max = ioread32(fifo_mem + SVGA_FIFO_MAX);
- uint32_t next_cmd = ioread32(fifo_mem + SVGA_FIFO_NEXT_CMD);
- uint32_t min = ioread32(fifo_mem + SVGA_FIFO_MIN);
- uint32_t stop = ioread32(fifo_mem + SVGA_FIFO_STOP);
+ u32 *fifo_mem = dev_priv->mmio_virt;
+ uint32_t max = vmw_mmio_read(fifo_mem + SVGA_FIFO_MAX);
+ uint32_t next_cmd = vmw_mmio_read(fifo_mem + SVGA_FIFO_NEXT_CMD);
+ uint32_t min = vmw_mmio_read(fifo_mem + SVGA_FIFO_MIN);
+ uint32_t stop = vmw_mmio_read(fifo_mem + SVGA_FIFO_STOP);
return ((max - next_cmd) + (stop - min) <= bytes);
}
@@ -321,7 +313,7 @@ static void *vmw_local_fifo_reserve(struct vmw_private *dev_priv,
uint32_t bytes)
{
struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
- u32 __iomem *fifo_mem = dev_priv->mmio_virt;
+ u32 *fifo_mem = dev_priv->mmio_virt;
uint32_t max;
uint32_t min;
uint32_t next_cmd;
@@ -329,9 +321,9 @@ static void *vmw_local_fifo_reserve(struct vmw_private *dev_priv,
int ret;
mutex_lock(&fifo_state->fifo_mutex);
- max = ioread32(fifo_mem + SVGA_FIFO_MAX);
- min = ioread32(fifo_mem + SVGA_FIFO_MIN);
- next_cmd = ioread32(fifo_mem + SVGA_FIFO_NEXT_CMD);
+ max = vmw_mmio_read(fifo_mem + SVGA_FIFO_MAX);
+ min = vmw_mmio_read(fifo_mem + SVGA_FIFO_MIN);
+ next_cmd = vmw_mmio_read(fifo_mem + SVGA_FIFO_NEXT_CMD);
if (unlikely(bytes >= (max - min)))
goto out_err;
@@ -342,7 +334,7 @@ static void *vmw_local_fifo_reserve(struct vmw_private *dev_priv,
fifo_state->reserved_size = bytes;
while (1) {
- uint32_t stop = ioread32(fifo_mem + SVGA_FIFO_STOP);
+ uint32_t stop = vmw_mmio_read(fifo_mem + SVGA_FIFO_STOP);
bool need_bounce = false;
bool reserve_in_place = false;
@@ -376,8 +368,8 @@ static void *vmw_local_fifo_reserve(struct vmw_private *dev_priv,
fifo_state->using_bounce_buffer = false;
if (reserveable)
- iowrite32(bytes, fifo_mem +
- SVGA_FIFO_RESERVED);
+ vmw_mmio_write(bytes, fifo_mem +
+ SVGA_FIFO_RESERVED);
return (void __force *) (fifo_mem +
(next_cmd >> 2));
} else {
@@ -427,7 +419,7 @@ void *vmw_fifo_reserve_dx(struct vmw_private *dev_priv, uint32_t bytes,
}
static void vmw_fifo_res_copy(struct vmw_fifo_state *fifo_state,
- u32 __iomem *fifo_mem,
+ u32 *fifo_mem,
uint32_t next_cmd,
uint32_t max, uint32_t min, uint32_t bytes)
{
@@ -439,17 +431,16 @@ static void vmw_fifo_res_copy(struct vmw_fifo_state *fifo_state,
if (bytes < chunk_size)
chunk_size = bytes;
- iowrite32(bytes, fifo_mem + SVGA_FIFO_RESERVED);
+ vmw_mmio_write(bytes, fifo_mem + SVGA_FIFO_RESERVED);
mb();
- memcpy_toio(fifo_mem + (next_cmd >> 2), buffer, chunk_size);
+ memcpy(fifo_mem + (next_cmd >> 2), buffer, chunk_size);
rest = bytes - chunk_size;
if (rest)
- memcpy_toio(fifo_mem + (min >> 2), buffer + (chunk_size >> 2),
- rest);
+ memcpy(fifo_mem + (min >> 2), buffer + (chunk_size >> 2), rest);
}
static void vmw_fifo_slow_copy(struct vmw_fifo_state *fifo_state,
- u32 __iomem *fifo_mem,
+ u32 *fifo_mem,
uint32_t next_cmd,
uint32_t max, uint32_t min, uint32_t bytes)
{
@@ -457,12 +448,12 @@ static void vmw_fifo_slow_copy(struct vmw_fifo_state *fifo_state,
fifo_state->dynamic_buffer : fifo_state->static_buffer;
while (bytes > 0) {
- iowrite32(*buffer++, fifo_mem + (next_cmd >> 2));
+ vmw_mmio_write(*buffer++, fifo_mem + (next_cmd >> 2));
next_cmd += sizeof(uint32_t);
if (unlikely(next_cmd == max))
next_cmd = min;
mb();
- iowrite32(next_cmd, fifo_mem + SVGA_FIFO_NEXT_CMD);
+ vmw_mmio_write(next_cmd, fifo_mem + SVGA_FIFO_NEXT_CMD);
mb();
bytes -= sizeof(uint32_t);
}
@@ -471,10 +462,10 @@ static void vmw_fifo_slow_copy(struct vmw_fifo_state *fifo_state,
static void vmw_local_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes)
{
struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
- u32 __iomem *fifo_mem = dev_priv->mmio_virt;
- uint32_t next_cmd = ioread32(fifo_mem + SVGA_FIFO_NEXT_CMD);
- uint32_t max = ioread32(fifo_mem + SVGA_FIFO_MAX);
- uint32_t min = ioread32(fifo_mem + SVGA_FIFO_MIN);
+ u32 *fifo_mem = dev_priv->mmio_virt;
+ uint32_t next_cmd = vmw_mmio_read(fifo_mem + SVGA_FIFO_NEXT_CMD);
+ uint32_t max = vmw_mmio_read(fifo_mem + SVGA_FIFO_MAX);
+ uint32_t min = vmw_mmio_read(fifo_mem + SVGA_FIFO_MIN);
bool reserveable = fifo_state->capabilities & SVGA_FIFO_CAP_RESERVE;
if (fifo_state->dx)
@@ -507,11 +498,11 @@ static void vmw_local_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes)
if (next_cmd >= max)
next_cmd -= max - min;
mb();
- iowrite32(next_cmd, fifo_mem + SVGA_FIFO_NEXT_CMD);
+ vmw_mmio_write(next_cmd, fifo_mem + SVGA_FIFO_NEXT_CMD);
}
if (reserveable)
- iowrite32(0, fifo_mem + SVGA_FIFO_RESERVED);
+ vmw_mmio_write(0, fifo_mem + SVGA_FIFO_RESERVED);
mb();
up_write(&fifo_state->rwsem);
vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index a3e3c83..b8c6a03 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -64,7 +64,7 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
break;
case DRM_VMW_PARAM_FIFO_HW_VERSION:
{
- u32 __iomem *fifo_mem = dev_priv->mmio_virt;
+ u32 *fifo_mem = dev_priv->mmio_virt;
const struct vmw_fifo_state *fifo = &dev_priv->fifo;
if ((dev_priv->capabilities & SVGA_CAP_GBOBJECTS)) {
@@ -73,11 +73,11 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
}
param->value =
- ioread32(fifo_mem +
- ((fifo->capabilities &
- SVGA_FIFO_CAP_3D_HWVERSION_REVISED) ?
- SVGA_FIFO_3D_HWVERSION_REVISED :
- SVGA_FIFO_3D_HWVERSION));
+ vmw_mmio_read(fifo_mem +
+ ((fifo->capabilities &
+ SVGA_FIFO_CAP_3D_HWVERSION_REVISED) ?
+ SVGA_FIFO_3D_HWVERSION_REVISED :
+ SVGA_FIFO_3D_HWVERSION));
break;
}
case DRM_VMW_PARAM_MAX_SURF_MEMORY:
@@ -179,7 +179,7 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
(struct drm_vmw_get_3d_cap_arg *) data;
struct vmw_private *dev_priv = vmw_priv(dev);
uint32_t size;
- u32 __iomem *fifo_mem;
+ u32 *fifo_mem;
void __user *buffer = (void __user *)((unsigned long)(arg->buffer));
void *bounce;
int ret;
@@ -229,7 +229,7 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
goto out_err;
} else {
fifo_mem = dev_priv->mmio_virt;
- memcpy_fromio(bounce, &fifo_mem[SVGA_FIFO_3D_CAPS], size);
+ memcpy(bounce, &fifo_mem[SVGA_FIFO_3D_CAPS], size);
}
ret = copy_to_user(buffer, bounce, size);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index 9498a5e..ac3eccd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -72,8 +72,8 @@ static bool vmw_fifo_idle(struct vmw_private *dev_priv, uint32_t seqno)
void vmw_update_seqno(struct vmw_private *dev_priv,
struct vmw_fifo_state *fifo_state)
{
- u32 __iomem *fifo_mem = dev_priv->mmio_virt;
- uint32_t seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+ u32 *fifo_mem = dev_priv->mmio_virt;
+ uint32_t seqno = vmw_mmio_read(fifo_mem + SVGA_FIFO_FENCE);
if (dev_priv->last_read_seqno != seqno) {
dev_priv->last_read_seqno = seqno;
@@ -178,8 +178,9 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
}
finish_wait(&dev_priv->fence_queue, &__wait);
if (ret == 0 && fifo_idle) {
- u32 __iomem *fifo_mem = dev_priv->mmio_virt;
- iowrite32(signal_seq, fifo_mem + SVGA_FIFO_FENCE);
+ u32 *fifo_mem = dev_priv->mmio_virt;
+
+ vmw_mmio_write(signal_seq, fifo_mem + SVGA_FIFO_FENCE);
}
wake_up_all(&dev_priv->fence_queue);
out_err:
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 03ffab2..a94b24d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -123,14 +123,14 @@ err_unreserve:
void vmw_cursor_update_position(struct vmw_private *dev_priv,
bool show, int x, int y)
{
- u32 __iomem *fifo_mem = dev_priv->mmio_virt;
+ u32 *fifo_mem = dev_priv->mmio_virt;
uint32_t count;
- iowrite32(show ? 1 : 0, fifo_mem + SVGA_FIFO_CURSOR_ON);
- iowrite32(x, fifo_mem + SVGA_FIFO_CURSOR_X);
- iowrite32(y, fifo_mem + SVGA_FIFO_CURSOR_Y);
- count = ioread32(fifo_mem + SVGA_FIFO_CURSOR_COUNT);
- iowrite32(++count, fifo_mem + SVGA_FIFO_CURSOR_COUNT);
+ vmw_mmio_write(show ? 1 : 0, fifo_mem + SVGA_FIFO_CURSOR_ON);
+ vmw_mmio_write(x, fifo_mem + SVGA_FIFO_CURSOR_X);
+ vmw_mmio_write(y, fifo_mem + SVGA_FIFO_CURSOR_Y);
+ count = vmw_mmio_read(fifo_mem + SVGA_FIFO_CURSOR_COUNT);
+ vmw_mmio_write(++count, fifo_mem + SVGA_FIFO_CURSOR_COUNT);
}
int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
@@ -1155,7 +1155,8 @@ int vmw_kms_write_svga(struct vmw_private *vmw_priv,
if (vmw_priv->capabilities & SVGA_CAP_PITCHLOCK)
vmw_write(vmw_priv, SVGA_REG_PITCHLOCK, pitch);
else if (vmw_fifo_have_pitchlock(vmw_priv))
- iowrite32(pitch, vmw_priv->mmio_virt + SVGA_FIFO_PITCHLOCK);
+ vmw_mmio_write(pitch, vmw_priv->mmio_virt +
+ SVGA_FIFO_PITCHLOCK);
vmw_write(vmw_priv, SVGA_REG_WIDTH, width);
vmw_write(vmw_priv, SVGA_REG_HEIGHT, height);
vmw_write(vmw_priv, SVGA_REG_BITS_PER_PIXEL, bpp);
@@ -1181,8 +1182,8 @@ int vmw_kms_save_vga(struct vmw_private *vmw_priv)
vmw_priv->vga_pitchlock =
vmw_read(vmw_priv, SVGA_REG_PITCHLOCK);
else if (vmw_fifo_have_pitchlock(vmw_priv))
- vmw_priv->vga_pitchlock = ioread32(vmw_priv->mmio_virt +
- SVGA_FIFO_PITCHLOCK);
+ vmw_priv->vga_pitchlock = vmw_mmio_read(vmw_priv->mmio_virt +
+ SVGA_FIFO_PITCHLOCK);
if (!(vmw_priv->capabilities & SVGA_CAP_DISPLAY_TOPOLOGY))
return 0;
@@ -1230,8 +1231,8 @@ int vmw_kms_restore_vga(struct vmw_private *vmw_priv)
vmw_write(vmw_priv, SVGA_REG_PITCHLOCK,
vmw_priv->vga_pitchlock);
else if (vmw_fifo_have_pitchlock(vmw_priv))
- iowrite32(vmw_priv->vga_pitchlock,
- vmw_priv->mmio_virt + SVGA_FIFO_PITCHLOCK);
+ vmw_mmio_write(vmw_priv->vga_pitchlock,
+ vmw_priv->mmio_virt + SVGA_FIFO_PITCHLOCK);
if (!(vmw_priv->capabilities & SVGA_CAP_DISPLAY_TOPOLOGY))
return 0;
--
2.4.3
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/4] drm/vmwgfx: Defer fence irq processing to a tasklet
2015-10-30 9:42 [PATCH 1/4] drm/vmwgfx: Replace iowrite/ioread with volatile memory accesses Thomas Hellstrom
@ 2015-10-30 9:42 ` Thomas Hellstrom
2015-10-30 10:23 ` Daniel Vetter
2015-10-30 9:42 ` [PATCH 3/4] drm/vmwgfx: Relax irq locking somewhat Thomas Hellstrom
2015-10-30 9:42 ` [PATCH 4/4] drm/vmwgfx: Rework locking around register accesses Thomas Hellstrom
2 siblings, 1 reply; 6+ messages in thread
From: Thomas Hellstrom @ 2015-10-30 9:42 UTC (permalink / raw)
To: dri-devel; +Cc: Thomas Hellstrom
Reduce the time in hardware irq context and hardware irq latency.
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 108 ++++++++++++++++++++--------------
drivers/gpu/drm/vmwgfx/vmwgfx_fence.h | 2 +
drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 6 +-
3 files changed, 68 insertions(+), 48 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 8e689b4..f40c36e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -47,6 +47,7 @@ struct vmw_fence_manager {
bool seqno_valid; /* Protected by @lock, and may not be set to true
without the @goal_irq_mutex held. */
unsigned ctx;
+ struct tasklet_struct tasklet;
};
struct vmw_user_fence {
@@ -81,6 +82,8 @@ struct vmw_event_fence_action {
uint32_t *tv_usec;
};
+static void vmw_fence_tasklet(unsigned long data);
+
static struct vmw_fence_manager *
fman_from_fence(struct vmw_fence_obj *fence)
{
@@ -115,12 +118,11 @@ static void vmw_fence_obj_destroy(struct fence *f)
container_of(f, struct vmw_fence_obj, base);
struct vmw_fence_manager *fman = fman_from_fence(fence);
- unsigned long irq_flags;
- spin_lock_irqsave(&fman->lock, irq_flags);
+ spin_lock_bh(&fman->lock);
list_del_init(&fence->head);
--fman->num_fence_objects;
- spin_unlock_irqrestore(&fman->lock, irq_flags);
+ spin_unlock_bh(&fman->lock);
fence->destroy(fence);
}
@@ -177,7 +179,6 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout)
struct vmw_private *dev_priv = fman->dev_priv;
struct vmwgfx_wait_cb cb;
long ret = timeout;
- unsigned long irq_flags;
if (likely(vmw_fence_obj_signaled(fence)))
return timeout;
@@ -185,7 +186,7 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout)
vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
vmw_seqno_waiter_add(dev_priv);
- spin_lock_irqsave(f->lock, irq_flags);
+ spin_lock_bh(f->lock);
if (intr && signal_pending(current)) {
ret = -ERESTARTSYS;
@@ -205,11 +206,11 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout)
__set_current_state(TASK_INTERRUPTIBLE);
else
__set_current_state(TASK_UNINTERRUPTIBLE);
- spin_unlock_irqrestore(f->lock, irq_flags);
+ spin_unlock_bh(f->lock);
ret = schedule_timeout(ret);
- spin_lock_irqsave(f->lock, irq_flags);
+ spin_lock_bh(f->lock);
if (ret > 0 && intr && signal_pending(current))
ret = -ERESTARTSYS;
}
@@ -219,7 +220,7 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout)
__set_current_state(TASK_RUNNING);
out:
- spin_unlock_irqrestore(f->lock, irq_flags);
+ spin_unlock_bh(f->lock);
vmw_seqno_waiter_remove(dev_priv);
@@ -300,21 +301,22 @@ struct vmw_fence_manager *vmw_fence_manager_init(struct vmw_private *dev_priv)
ttm_round_pot(sizeof(struct vmw_event_fence_action));
mutex_init(&fman->goal_irq_mutex);
fman->ctx = fence_context_alloc(1);
+ tasklet_init(&fman->tasklet, vmw_fence_tasklet,
+ (unsigned long) fman);
return fman;
}
void vmw_fence_manager_takedown(struct vmw_fence_manager *fman)
{
- unsigned long irq_flags;
bool lists_empty;
(void) cancel_work_sync(&fman->work);
- spin_lock_irqsave(&fman->lock, irq_flags);
+ spin_lock_bh(&fman->lock);
lists_empty = list_empty(&fman->fence_list) &&
list_empty(&fman->cleanup_list);
- spin_unlock_irqrestore(&fman->lock, irq_flags);
+ spin_unlock_bh(&fman->lock);
BUG_ON(!lists_empty);
kfree(fman);
@@ -324,7 +326,6 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman,
struct vmw_fence_obj *fence, u32 seqno,
void (*destroy) (struct vmw_fence_obj *fence))
{
- unsigned long irq_flags;
int ret = 0;
fence_init(&fence->base, &vmw_fence_ops, &fman->lock,
@@ -332,7 +333,7 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman,
INIT_LIST_HEAD(&fence->seq_passed_actions);
fence->destroy = destroy;
- spin_lock_irqsave(&fman->lock, irq_flags);
+ spin_lock_bh(&fman->lock);
if (unlikely(fman->fifo_down)) {
ret = -EBUSY;
goto out_unlock;
@@ -341,7 +342,7 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman,
++fman->num_fence_objects;
out_unlock:
- spin_unlock_irqrestore(&fman->lock, irq_flags);
+ spin_unlock_bh(&fman->lock);
return ret;
}
@@ -490,11 +491,9 @@ rerun:
void vmw_fences_update(struct vmw_fence_manager *fman)
{
- unsigned long irq_flags;
-
- spin_lock_irqsave(&fman->lock, irq_flags);
+ spin_lock_bh(&fman->lock);
__vmw_fences_update(fman);
- spin_unlock_irqrestore(&fman->lock, irq_flags);
+ spin_unlock_bh(&fman->lock);
}
bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence)
@@ -694,11 +693,9 @@ void vmw_fence_fifo_down(struct vmw_fence_manager *fman)
void vmw_fence_fifo_up(struct vmw_fence_manager *fman)
{
- unsigned long irq_flags;
-
- spin_lock_irqsave(&fman->lock, irq_flags);
+ spin_lock_bh(&fman->lock);
fman->fifo_down = false;
- spin_unlock_irqrestore(&fman->lock, irq_flags);
+ spin_unlock_bh(&fman->lock);
}
@@ -825,10 +822,9 @@ void vmw_event_fence_fpriv_gone(struct vmw_fence_manager *fman,
{
struct vmw_event_fence_action *eaction;
struct drm_pending_event *event;
- unsigned long irq_flags;
while (1) {
- spin_lock_irqsave(&fman->lock, irq_flags);
+ spin_lock_bh(&fman->lock);
if (list_empty(event_list))
goto out_unlock;
eaction = list_first_entry(event_list,
@@ -837,11 +833,11 @@ void vmw_event_fence_fpriv_gone(struct vmw_fence_manager *fman,
list_del_init(&eaction->fpriv_head);
event = eaction->event;
eaction->event = NULL;
- spin_unlock_irqrestore(&fman->lock, irq_flags);
+ spin_unlock_bh(&fman->lock);
event->destroy(event);
}
out_unlock:
- spin_unlock_irqrestore(&fman->lock, irq_flags);
+ spin_unlock_bh(&fman->lock);
}
@@ -854,7 +850,7 @@ out_unlock:
* This function is called when the seqno of the fence where @action is
* attached has passed. It queues the event on the submitter's event list.
* This function is always called from atomic context, and may be called
- * from irq context.
+ * from tasklet context.
*/
static void vmw_event_fence_action_seq_passed(struct vmw_fence_action *action)
{
@@ -863,13 +859,12 @@ static void vmw_event_fence_action_seq_passed(struct vmw_fence_action *action)
struct drm_device *dev = eaction->dev;
struct drm_pending_event *event = eaction->event;
struct drm_file *file_priv;
- unsigned long irq_flags;
if (unlikely(event == NULL))
return;
file_priv = event->file_priv;
- spin_lock_irqsave(&dev->event_lock, irq_flags);
+ spin_lock_bh(&dev->event_lock);
if (likely(eaction->tv_sec != NULL)) {
struct timeval tv;
@@ -883,7 +878,7 @@ static void vmw_event_fence_action_seq_passed(struct vmw_fence_action *action)
list_add_tail(&eaction->event->link, &file_priv->event_list);
eaction->event = NULL;
wake_up_all(&file_priv->event_wait);
- spin_unlock_irqrestore(&dev->event_lock, irq_flags);
+ spin_unlock_bh(&dev->event_lock);
}
/**
@@ -900,11 +895,10 @@ static void vmw_event_fence_action_cleanup(struct vmw_fence_action *action)
struct vmw_event_fence_action *eaction =
container_of(action, struct vmw_event_fence_action, action);
struct vmw_fence_manager *fman = fman_from_fence(eaction->fence);
- unsigned long irq_flags;
- spin_lock_irqsave(&fman->lock, irq_flags);
+ spin_lock_bh(&fman->lock);
list_del(&eaction->fpriv_head);
- spin_unlock_irqrestore(&fman->lock, irq_flags);
+ spin_unlock_bh(&fman->lock);
vmw_fence_obj_unreference(&eaction->fence);
kfree(eaction);
@@ -924,11 +918,10 @@ static void vmw_fence_obj_add_action(struct vmw_fence_obj *fence,
struct vmw_fence_action *action)
{
struct vmw_fence_manager *fman = fman_from_fence(fence);
- unsigned long irq_flags;
bool run_update = false;
mutex_lock(&fman->goal_irq_mutex);
- spin_lock_irqsave(&fman->lock, irq_flags);
+ spin_lock_bh(&fman->lock);
fman->pending_actions[action->type]++;
if (fence_is_signaled_locked(&fence->base)) {
@@ -947,7 +940,7 @@ static void vmw_fence_obj_add_action(struct vmw_fence_obj *fence,
run_update = vmw_fence_goal_check_locked(fence);
}
- spin_unlock_irqrestore(&fman->lock, irq_flags);
+ spin_unlock_bh(&fman->lock);
if (run_update) {
if (!fman->goal_irq_on) {
@@ -985,7 +978,6 @@ int vmw_event_fence_action_queue(struct drm_file *file_priv,
struct vmw_event_fence_action *eaction;
struct vmw_fence_manager *fman = fman_from_fence(fence);
struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
- unsigned long irq_flags;
eaction = kzalloc(sizeof(*eaction), GFP_KERNEL);
if (unlikely(eaction == NULL))
@@ -1002,9 +994,9 @@ int vmw_event_fence_action_queue(struct drm_file *file_priv,
eaction->tv_sec = tv_sec;
eaction->tv_usec = tv_usec;
- spin_lock_irqsave(&fman->lock, irq_flags);
+ spin_lock_bh(&fman->lock);
list_add_tail(&eaction->fpriv_head, &vmw_fp->fence_events);
- spin_unlock_irqrestore(&fman->lock, irq_flags);
+ spin_unlock_bh(&fman->lock);
vmw_fence_obj_add_action(fence, &eaction->action);
@@ -1025,16 +1017,15 @@ static int vmw_event_fence_action_create(struct drm_file *file_priv,
struct vmw_event_fence_pending *event;
struct vmw_fence_manager *fman = fman_from_fence(fence);
struct drm_device *dev = fman->dev_priv->dev;
- unsigned long irq_flags;
int ret;
- spin_lock_irqsave(&dev->event_lock, irq_flags);
+ spin_lock_bh(&dev->event_lock);
ret = (file_priv->event_space < sizeof(event->event)) ? -EBUSY : 0;
if (likely(ret == 0))
file_priv->event_space -= sizeof(event->event);
- spin_unlock_irqrestore(&dev->event_lock, irq_flags);
+ spin_unlock_bh(&dev->event_lock);
if (unlikely(ret != 0)) {
DRM_ERROR("Failed to allocate event space for this file.\n");
@@ -1078,9 +1069,9 @@ static int vmw_event_fence_action_create(struct drm_file *file_priv,
out_no_queue:
event->base.destroy(&event->base);
out_no_event:
- spin_lock_irqsave(&dev->event_lock, irq_flags);
+ spin_lock_bh(&dev->event_lock);
file_priv->event_space += sizeof(*event);
- spin_unlock_irqrestore(&dev->event_lock, irq_flags);
+ spin_unlock_bh(&dev->event_lock);
out_no_space:
return ret;
}
@@ -1172,3 +1163,32 @@ out_no_ref_obj:
vmw_fence_obj_unreference(&fence);
return ret;
}
+
+/**
+ * vmw_fence_tasklet - Fence manager tasklet entry point
+ *
+ * @data: The tasklet closure - A pointer to the fence manager cast to an
+ * unsigned long.
+ */
+static void vmw_fence_tasklet(unsigned long data)
+{
+ struct vmw_fence_manager *fman = (struct vmw_fence_manager *) data;
+
+ spin_lock(&fman->lock);
+ __vmw_fences_update(fman);
+ spin_unlock(&fman->lock);
+ wake_up_all(&fman->dev_priv->fence_queue);
+}
+
+/**
+ * vmw_fence_tasklet_schedule - Schedule a fence manager tasklet run
+ *
+ * @fman: Pointer to a fence manager
+ */
+void vmw_fence_tasklet_schedule(struct vmw_fence_manager *fman)
+{
+ if (!fman)
+ return;
+
+ tasklet_schedule(&fman->tasklet);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
index 8be6c29..e55b2c9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
@@ -124,4 +124,6 @@ extern int vmw_event_fence_action_queue(struct drm_file *filee_priv,
uint32_t *tv_sec,
uint32_t *tv_usec,
bool interruptible);
+extern void vmw_fence_tasklet_schedule(struct vmw_fence_manager *fman);
+
#endif /* _VMWGFX_FENCE_H_ */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index ac3eccd..b0a6e65 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -48,10 +48,8 @@ irqreturn_t vmw_irq_handler(int irq, void *arg)
return IRQ_NONE;
if (masked_status & (SVGA_IRQFLAG_ANY_FENCE |
- SVGA_IRQFLAG_FENCE_GOAL)) {
- vmw_fences_update(dev_priv->fman);
- wake_up_all(&dev_priv->fence_queue);
- }
+ SVGA_IRQFLAG_FENCE_GOAL))
+ vmw_fence_tasklet_schedule(dev_priv->fman);
if (masked_status & SVGA_IRQFLAG_FIFO_PROGRESS)
wake_up_all(&dev_priv->fifo_queue);
--
2.4.3
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 3/4] drm/vmwgfx: Relax irq locking somewhat
2015-10-30 9:42 [PATCH 1/4] drm/vmwgfx: Replace iowrite/ioread with volatile memory accesses Thomas Hellstrom
2015-10-30 9:42 ` [PATCH 2/4] drm/vmwgfx: Defer fence irq processing to a tasklet Thomas Hellstrom
@ 2015-10-30 9:42 ` Thomas Hellstrom
2015-10-30 9:42 ` [PATCH 4/4] drm/vmwgfx: Rework locking around register accesses Thomas Hellstrom
2 siblings, 0 replies; 6+ messages in thread
From: Thomas Hellstrom @ 2015-10-30 9:42 UTC (permalink / raw)
To: dri-devel; +Cc: Thomas Hellstrom
Relax locking with the goal of reducing the number of locking cycles and
time spent with irqs disabled.
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +-
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 9 ++-
drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c | 23 ++------
drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 104 ++++++++++-------------------------
4 files changed, 39 insertions(+), 99 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index d1c34ab..a09cf85 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -643,7 +643,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
init_waitqueue_head(&dev_priv->fence_queue);
init_waitqueue_head(&dev_priv->fifo_queue);
dev_priv->fence_queue_waiters = 0;
- atomic_set(&dev_priv->fifo_queue_waiters, 0);
+ dev_priv->fifo_queue_waiters = 0;
dev_priv->used_memory_size = 0;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 198c8b1..a8ae9df 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -440,13 +440,12 @@ struct vmw_private {
spinlock_t waiter_lock;
int fence_queue_waiters; /* Protected by waiter_lock */
int goal_queue_waiters; /* Protected by waiter_lock */
- int cmdbuf_waiters; /* Protected by irq_lock */
- int error_waiters; /* Protected by irq_lock */
- atomic_t fifo_queue_waiters;
+ int cmdbuf_waiters; /* Protected by waiter_lock */
+ int error_waiters; /* Protected by waiter_lock */
+ int fifo_queue_waiters; /* Protected by waiter_lock */
uint32_t last_read_seqno;
- spinlock_t irq_lock;
struct vmw_fence_manager *fman;
- uint32_t irq_mask;
+ uint32_t irq_mask; /* Updates protected by waiter_lock */
/*
* Device state
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index 0cbaf88..a8baf5f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -252,7 +252,6 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv,
unsigned long timeout)
{
long ret = 1L;
- unsigned long irq_flags;
if (likely(!vmw_fifo_is_full(dev_priv, bytes)))
return 0;
@@ -262,16 +261,8 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv,
return vmw_fifo_wait_noirq(dev_priv, bytes,
interruptible, timeout);
- spin_lock(&dev_priv->waiter_lock);
- if (atomic_add_return(1, &dev_priv->fifo_queue_waiters) > 0) {
- spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
- outl(SVGA_IRQFLAG_FIFO_PROGRESS,
- dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
- dev_priv->irq_mask |= SVGA_IRQFLAG_FIFO_PROGRESS;
- vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
- spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
- }
- spin_unlock(&dev_priv->waiter_lock);
+ vmw_generic_waiter_add(dev_priv, SVGA_IRQFLAG_FIFO_PROGRESS,
+ &dev_priv->fifo_queue_waiters);
if (interruptible)
ret = wait_event_interruptible_timeout
@@ -287,14 +278,8 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv,
else if (likely(ret > 0))
ret = 0;
- spin_lock(&dev_priv->waiter_lock);
- if (atomic_dec_and_test(&dev_priv->fifo_queue_waiters)) {
- spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
- dev_priv->irq_mask &= ~SVGA_IRQFLAG_FIFO_PROGRESS;
- vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
- spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
- }
- spin_unlock(&dev_priv->waiter_lock);
+ vmw_generic_waiter_remove(dev_priv, SVGA_IRQFLAG_FIFO_PROGRESS,
+ &dev_priv->fifo_queue_waiters);
return ret;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index b0a6e65..12aaed7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -36,15 +36,13 @@ irqreturn_t vmw_irq_handler(int irq, void *arg)
struct vmw_private *dev_priv = vmw_priv(dev);
uint32_t status, masked_status;
- spin_lock(&dev_priv->irq_lock);
status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
- masked_status = status & dev_priv->irq_mask;
- spin_unlock(&dev_priv->irq_lock);
+ masked_status = status & READ_ONCE(dev_priv->irq_mask);
if (likely(status))
outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
- if (!masked_status)
+ if (!status)
return IRQ_NONE;
if (masked_status & (SVGA_IRQFLAG_ANY_FENCE |
@@ -188,65 +186,51 @@ out_err:
return ret;
}
-void vmw_seqno_waiter_add(struct vmw_private *dev_priv)
+void vmw_generic_waiter_add(struct vmw_private *dev_priv,
+ u32 flag, int *waiter_count)
{
- spin_lock(&dev_priv->waiter_lock);
- if (dev_priv->fence_queue_waiters++ == 0) {
- unsigned long irq_flags;
-
- spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
- outl(SVGA_IRQFLAG_ANY_FENCE,
- dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
- dev_priv->irq_mask |= SVGA_IRQFLAG_ANY_FENCE;
+ spin_lock_bh(&dev_priv->waiter_lock);
+ if ((*waiter_count)++ == 0) {
+ outl(flag, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+ dev_priv->irq_mask |= flag;
vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
- spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
}
- spin_unlock(&dev_priv->waiter_lock);
+ spin_unlock_bh(&dev_priv->waiter_lock);
}
-void vmw_seqno_waiter_remove(struct vmw_private *dev_priv)
+void vmw_generic_waiter_remove(struct vmw_private *dev_priv,
+ u32 flag, int *waiter_count)
{
- spin_lock(&dev_priv->waiter_lock);
- if (--dev_priv->fence_queue_waiters == 0) {
- unsigned long irq_flags;
-
- spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
- dev_priv->irq_mask &= ~SVGA_IRQFLAG_ANY_FENCE;
+ spin_lock_bh(&dev_priv->waiter_lock);
+ if (--(*waiter_count) == 0) {
+ dev_priv->irq_mask &= ~flag;
vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
- spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
}
- spin_unlock(&dev_priv->waiter_lock);
+ spin_unlock_bh(&dev_priv->waiter_lock);
}
+void vmw_seqno_waiter_add(struct vmw_private *dev_priv)
+{
+ vmw_generic_waiter_add(dev_priv, SVGA_IRQFLAG_ANY_FENCE,
+ &dev_priv->fence_queue_waiters);
+}
+
+void vmw_seqno_waiter_remove(struct vmw_private *dev_priv)
+{
+ vmw_generic_waiter_remove(dev_priv, SVGA_IRQFLAG_ANY_FENCE,
+ &dev_priv->fence_queue_waiters);
+}
void vmw_goal_waiter_add(struct vmw_private *dev_priv)
{
- spin_lock(&dev_priv->waiter_lock);
- if (dev_priv->goal_queue_waiters++ == 0) {
- unsigned long irq_flags;
-
- spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
- outl(SVGA_IRQFLAG_FENCE_GOAL,
- dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
- dev_priv->irq_mask |= SVGA_IRQFLAG_FENCE_GOAL;
- vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
- spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
- }
- spin_unlock(&dev_priv->waiter_lock);
+ vmw_generic_waiter_add(dev_priv, SVGA_IRQFLAG_FENCE_GOAL,
+ &dev_priv->goal_queue_waiters);
}
void vmw_goal_waiter_remove(struct vmw_private *dev_priv)
{
- spin_lock(&dev_priv->waiter_lock);
- if (--dev_priv->goal_queue_waiters == 0) {
- unsigned long irq_flags;
-
- spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
- dev_priv->irq_mask &= ~SVGA_IRQFLAG_FENCE_GOAL;
- vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
- spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
- }
- spin_unlock(&dev_priv->waiter_lock);
+ vmw_generic_waiter_remove(dev_priv, SVGA_IRQFLAG_FENCE_GOAL,
+ &dev_priv->goal_queue_waiters);
}
int vmw_wait_seqno(struct vmw_private *dev_priv,
@@ -303,7 +287,6 @@ void vmw_irq_preinstall(struct drm_device *dev)
if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK))
return;
- spin_lock_init(&dev_priv->irq_lock);
status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
}
@@ -326,30 +309,3 @@ void vmw_irq_uninstall(struct drm_device *dev)
status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
}
-
-void vmw_generic_waiter_add(struct vmw_private *dev_priv,
- u32 flag, int *waiter_count)
-{
- unsigned long irq_flags;
-
- spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
- if ((*waiter_count)++ == 0) {
- outl(flag, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
- dev_priv->irq_mask |= flag;
- vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
- }
- spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
-}
-
-void vmw_generic_waiter_remove(struct vmw_private *dev_priv,
- u32 flag, int *waiter_count)
-{
- unsigned long irq_flags;
-
- spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
- if (--(*waiter_count) == 0) {
- dev_priv->irq_mask &= ~flag;
- vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
- }
- spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
-}
--
2.4.3
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 4/4] drm/vmwgfx: Rework locking around register accesses
2015-10-30 9:42 [PATCH 1/4] drm/vmwgfx: Replace iowrite/ioread with volatile memory accesses Thomas Hellstrom
2015-10-30 9:42 ` [PATCH 2/4] drm/vmwgfx: Defer fence irq processing to a tasklet Thomas Hellstrom
2015-10-30 9:42 ` [PATCH 3/4] drm/vmwgfx: Relax irq locking somewhat Thomas Hellstrom
@ 2015-10-30 9:42 ` Thomas Hellstrom
2 siblings, 0 replies; 6+ messages in thread
From: Thomas Hellstrom @ 2015-10-30 9:42 UTC (permalink / raw)
To: dri-devel; +Cc: Thomas Hellstrom
Relax the required locking to, instead of disabling irqs, disable tasklets.
Allow the caller to perform the locking using utility functions, which
allows the caller to combine a number of register accesses within the
same critical section. Implement this for capability reads and command
buffer submission.
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c | 8 +-
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 9 +--
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 130 ++++++++++++++++++++++++++++-----
drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 6 +-
drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c | 40 +++++++---
drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 20 ++---
drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 2 +-
7 files changed, 163 insertions(+), 52 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
index 8a76821..b8d2436 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
@@ -290,18 +290,20 @@ void vmw_cmdbuf_header_free(struct vmw_cmdbuf_header *header)
*/
static int vmw_cmdbuf_header_submit(struct vmw_cmdbuf_header *header)
{
- struct vmw_cmdbuf_man *man = header->man;
+ struct vmw_private *dev_priv = header->man->dev_priv;
u32 val;
if (sizeof(header->handle) > 4)
val = (header->handle >> 32);
else
val = 0;
- vmw_write(man->dev_priv, SVGA_REG_COMMAND_HIGH, val);
+ vmw_hw_lock(dev_priv, true);
+ __vmw_write(dev_priv, SVGA_REG_COMMAND_HIGH, val);
val = (header->handle & 0xFFFFFFFFULL);
val |= header->cb_context & SVGA_CB_CONTEXT_MASK;
- vmw_write(man->dev_priv, SVGA_REG_COMMAND_LOW, val);
+ __vmw_write(dev_priv, SVGA_REG_COMMAND_LOW, val);
+ vmw_hw_unlock(dev_priv, true);
return header->cb_header->status;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index a09cf85..2a5496a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -631,7 +631,6 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
ttm_lock_init(&dev_priv->reservation_sem);
spin_lock_init(&dev_priv->hw_lock);
spin_lock_init(&dev_priv->waiter_lock);
- spin_lock_init(&dev_priv->cap_lock);
spin_lock_init(&dev_priv->svga_lock);
for (i = vmw_res_context; i < vmw_res_max; ++i) {
@@ -854,10 +853,10 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
}
if (dev_priv->has_mob) {
- spin_lock(&dev_priv->cap_lock);
- vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_DX);
- dev_priv->has_dx = !!vmw_read(dev_priv, SVGA_REG_DEV_CAP);
- spin_unlock(&dev_priv->cap_lock);
+ vmw_hw_lock(dev_priv, true);
+ __vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_DX);
+ dev_priv->has_dx = !!__vmw_read(dev_priv, SVGA_REG_DEV_CAP);
+ vmw_hw_unlock(dev_priv, true);
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index a8ae9df..f7dcbcc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -385,7 +385,6 @@ struct vmw_private {
bool has_gmr;
bool has_mob;
spinlock_t hw_lock;
- spinlock_t cap_lock;
bool has_dx;
/*
@@ -546,34 +545,127 @@ static inline struct vmw_master *vmw_master(struct drm_master *master)
return (struct vmw_master *) master->driver_priv;
}
-/*
- * The locking here is fine-grained, so that it is performed once
- * for every read- and write operation. This is of course costly, but we
- * don't perform much register access in the timing critical paths anyway.
- * Instead we have the extra benefit of being sure that we don't forget
- * the hw lock around register accesses.
+/**
+ * vmw_hw_lock - Perform necessary locking for register access
+ *
+ * @dev_priv: Pointer to device private structure
+ * @lock_bh: Disable bottom halves. Always set to true unless irqs are disabled.
*/
-static inline void vmw_write(struct vmw_private *dev_priv,
- unsigned int offset, uint32_t value)
+static inline void vmw_hw_lock(struct vmw_private *dev_priv,
+ bool lock_bh)
{
- unsigned long irq_flags;
+ if (lock_bh)
+ spin_lock_bh(&dev_priv->hw_lock);
+ else
+ spin_lock(&dev_priv->hw_lock);
+}
- spin_lock_irqsave(&dev_priv->hw_lock, irq_flags);
+/**
+ * vmw_hw_unlock - Perform necessary unlocking after register access
+ *
+ * @dev_priv: Pointer to device private structure
+ * @unlock_bh: Disable bottom halves. Always set to true if it was set to tru
+ * in the corresponding lock.
+ */
+static inline void vmw_hw_unlock(struct vmw_private *dev_priv,
+ bool unlock_bh)
+{
+ if (unlock_bh)
+ spin_unlock_bh(&dev_priv->hw_lock);
+ else
+ spin_unlock(&dev_priv->hw_lock);
+}
+
+/**
+ * __vmw_write - Write a value to a device register with external locking.
+ *
+ * @dev_priv: Pointer to the device private structure
+ * @offset: Device register offset
+ * @value: Value to write
+ *
+ * Note that the caller must ensure that when calling this function,
+ * @dev_priv::hw_lock must be held and that we're safe against racing
+ * against tasklets.
+ */
+static inline void __vmw_write(struct vmw_private *dev_priv,
+ unsigned int offset, uint32_t value)
+{
+#ifdef CONFIG_LOCKDEP
+ lockdep_assert_held_once(&dev_priv->hw_lock.rlock);
+ WARN_ON_ONCE(!(in_softirq() || irqs_disabled()));
+#endif
outl(offset, dev_priv->io_start + VMWGFX_INDEX_PORT);
outl(value, dev_priv->io_start + VMWGFX_VALUE_PORT);
- spin_unlock_irqrestore(&dev_priv->hw_lock, irq_flags);
}
-static inline uint32_t vmw_read(struct vmw_private *dev_priv,
- unsigned int offset)
+/**
+ * __vmw_read - Read a value from a device register with external locking.
+ *
+ * @dev_priv: Pointer to the device private structure
+ * @offset: Device register offset
+ * @value: Value to write
+ *
+ * Note that the caller must ensure that when calling this function,
+ * @dev_priv::hw_lock is be held and that we're safe against racing
+ * against tasklets.
+ */
+static inline uint32_t __vmw_read(struct vmw_private *dev_priv,
+ unsigned int offset)
{
- unsigned long irq_flags;
u32 val;
- spin_lock_irqsave(&dev_priv->hw_lock, irq_flags);
+#ifdef CONFIG_LOCKDEP
+ lockdep_assert_held_once(&dev_priv->hw_lock.rlock);
+ WARN_ON_ONCE(!(in_softirq() || irqs_disabled()));
+#endif
outl(offset, dev_priv->io_start + VMWGFX_INDEX_PORT);
val = inl(dev_priv->io_start + VMWGFX_VALUE_PORT);
- spin_unlock_irqrestore(&dev_priv->hw_lock, irq_flags);
+
+ return val;
+}
+
+/**
+ * vmw_write - Write a value to a device register
+ *
+ * @dev_priv: Pointer to the device private structure
+ * @offset: Device register offset
+ * @value: Value to write
+ *
+ * This function may not be called when irqs are disabled, since
+ * its call to vmw_hw_unlock will re-enable irqs.
+ */
+static inline void vmw_write(struct vmw_private *dev_priv,
+ unsigned int offset, uint32_t value)
+{
+#ifdef CONFIG_LOCKDEP
+ WARN_ON_ONCE(irqs_disabled());
+#endif
+ vmw_hw_lock(dev_priv, true);
+ __vmw_write(dev_priv, offset, value);
+ vmw_hw_unlock(dev_priv, true);
+}
+
+/**
+ * vmw_write - Read a value from a device register
+ *
+ * @dev_priv: Pointer to the device private structure
+ * @offset: Device register offset
+ * @value: Value to write
+ *
+ * This function may not be called when irqs are disabled, since
+ * its call to vmw_hw_unlock will re-enable irqs.
+ */
+static inline uint32_t vmw_read(struct vmw_private *dev_priv,
+ unsigned int offset)
+{
+ u32 val;
+
+#ifdef CONFIG_LOCKDEP
+ WARN_ON_ONCE(irqs_disabled());
+#endif
+ vmw_hw_lock(dev_priv, true);
+ val = __vmw_read(dev_priv, offset);
+ vmw_hw_unlock(dev_priv, true);
return val;
}
@@ -722,8 +814,8 @@ extern void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes);
extern void vmw_fifo_commit_flush(struct vmw_private *dev_priv, uint32_t bytes);
extern int vmw_fifo_send_fence(struct vmw_private *dev_priv,
uint32_t *seqno);
-extern void vmw_fifo_ping_host_locked(struct vmw_private *, uint32_t reason);
-extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason);
+extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason,
+ bool lock_bh);
extern bool vmw_fifo_have_3d(struct vmw_private *dev_priv);
extern bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv);
extern int vmw_fifo_emit_dummy_query(struct vmw_private *dev_priv,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index f40c36e..ac66aad 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -149,7 +149,7 @@ static bool vmw_fence_enable_signaling(struct fence *f)
if (seqno - fence->base.seqno < VMW_FENCE_WRAP)
return false;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, false);
return true;
}
@@ -183,7 +183,7 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout)
if (likely(vmw_fence_obj_signaled(fence)))
return timeout;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
vmw_seqno_waiter_add(dev_priv);
spin_lock_bh(f->lock);
@@ -525,7 +525,7 @@ void vmw_fence_obj_flush(struct vmw_fence_obj *fence)
{
struct vmw_private *dev_priv = fman_from_fence(fence)->dev_priv;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
}
static void vmw_fence_destroy(struct vmw_fence_obj *fence)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index a8baf5f..a70cc88 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -49,10 +49,10 @@ bool vmw_fifo_have_3d(struct vmw_private *dev_priv)
if (!dev_priv->has_mob)
return false;
- spin_lock(&dev_priv->cap_lock);
- vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_3D);
- result = vmw_read(dev_priv, SVGA_REG_DEV_CAP);
- spin_unlock(&dev_priv->cap_lock);
+ vmw_hw_lock(dev_priv, true);
+ __vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_3D);
+ result = __vmw_read(dev_priv, SVGA_REG_DEV_CAP);
+ vmw_hw_unlock(dev_priv, true);
return (result != 0);
}
@@ -163,14 +163,32 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
return 0;
}
-void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
+/**
+ * vmw_fifo_ping_host - Notify the device that there are device commands
+ * to process
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @reason: A valid reason to ping the host. See device headers.
+ * @lock_bh: Always set to true unless irqs are disabled in which case
+ * it must be set to false.
+ *
+ * Aka the device "doorbell". The device has an optimization that allows
+ * avoiding calling the relatively expensive register write to
+ * SVGA_REG_SYNC if the SVGA_FIFO_BUSY mmio register is set to 1, meaning
+ * that the doorbell has already been called. The SVGA_FIFO_BUSY mmio register
+ * will be cleared by the device when a new write to SVGA_REG_SYNC is needed
+ * to notify the device of pending commands.
+ */
+void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason,
+ bool lock_bh)
{
u32 *fifo_mem = dev_priv->mmio_virt;
- preempt_disable();
- if (cmpxchg(fifo_mem + SVGA_FIFO_BUSY, 0, 1) == 0)
- vmw_write(dev_priv, SVGA_REG_SYNC, reason);
- preempt_enable();
+ if (cmpxchg(fifo_mem + SVGA_FIFO_BUSY, 0, 1) == 0) {
+ vmw_hw_lock(dev_priv, lock_bh);
+ __vmw_write(dev_priv, SVGA_REG_SYNC, reason);
+ vmw_hw_unlock(dev_priv, lock_bh);
+ }
}
void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
@@ -256,7 +274,7 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv,
if (likely(!vmw_fifo_is_full(dev_priv, bytes)))
return 0;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_FIFOFULL);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_FIFOFULL, true);
if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK))
return vmw_fifo_wait_noirq(dev_priv, bytes,
interruptible, timeout);
@@ -490,7 +508,7 @@ static void vmw_local_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes)
vmw_mmio_write(0, fifo_mem + SVGA_FIFO_RESERVED);
mb();
up_write(&fifo_state->rwsem);
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
mutex_unlock(&fifo_state->fifo_mutex);
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index b8c6a03..8ce0886 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -159,14 +159,14 @@ static int vmw_fill_compat_cap(struct vmw_private *dev_priv, void *bounce,
(pair_offset + max_size * sizeof(SVGA3dCapPair)) / sizeof(u32);
compat_cap->header.type = SVGA3DCAPS_RECORD_DEVCAPS;
- spin_lock(&dev_priv->cap_lock);
+ vmw_hw_lock(dev_priv, true);
for (i = 0; i < max_size; ++i) {
- vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
+ __vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
compat_cap->pairs[i][0] = i;
compat_cap->pairs[i][1] = vmw_mask_multisample
- (i, vmw_read(dev_priv, SVGA_REG_DEV_CAP));
+ (i, __vmw_read(dev_priv, SVGA_REG_DEV_CAP));
}
- spin_unlock(&dev_priv->cap_lock);
+ vmw_hw_unlock(dev_priv, true);
return 0;
}
@@ -216,13 +216,13 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
if (num > SVGA3D_DEVCAP_MAX)
num = SVGA3D_DEVCAP_MAX;
- spin_lock(&dev_priv->cap_lock);
+ vmw_hw_lock(dev_priv, true);
for (i = 0; i < num; ++i) {
- vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
+ __vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
*bounce32++ = vmw_mask_multisample
- (i, vmw_read(dev_priv, SVGA_REG_DEV_CAP));
+ (i, __vmw_read(dev_priv, SVGA_REG_DEV_CAP));
}
- spin_unlock(&dev_priv->cap_lock);
+ vmw_hw_unlock(dev_priv, true);
} else if (gb_objects) {
ret = vmw_fill_compat_cap(dev_priv, bounce, size);
if (unlikely(ret != 0))
@@ -420,7 +420,7 @@ unsigned int vmw_fops_poll(struct file *filp, struct poll_table_struct *wait)
struct vmw_private *dev_priv =
vmw_priv(file_priv->minor->dev);
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
return drm_poll(filp, wait);
}
@@ -443,6 +443,6 @@ ssize_t vmw_fops_read(struct file *filp, char __user *buffer,
struct vmw_private *dev_priv =
vmw_priv(file_priv->minor->dev);
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
return drm_read(filp, buffer, count, offset);
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index 12aaed7..af6970b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -246,7 +246,7 @@ int vmw_wait_seqno(struct vmw_private *dev_priv,
if (likely(vmw_seqno_passed(dev_priv, seqno)))
return 0;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
if (!(fifo->capabilities & SVGA_FIFO_CAP_FENCE))
return vmw_fallback_wait(dev_priv, lazy, true, seqno,
--
2.4.3
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH 2/4] drm/vmwgfx: Defer fence irq processing to a tasklet
2015-10-30 9:42 ` [PATCH 2/4] drm/vmwgfx: Defer fence irq processing to a tasklet Thomas Hellstrom
@ 2015-10-30 10:23 ` Daniel Vetter
2015-10-30 11:08 ` Thomas Hellstrom
0 siblings, 1 reply; 6+ messages in thread
From: Daniel Vetter @ 2015-10-30 10:23 UTC (permalink / raw)
To: Thomas Hellstrom; +Cc: dri-devel
On Fri, Oct 30, 2015 at 02:42:44AM -0700, Thomas Hellstrom wrote:
> Reduce the time in hardware irq context and hardware irq latency.
>
> Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
> Reviewed-by: Sinclair Yeh <syeh@vmware.com>
> ---
> drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 108 ++++++++++++++++++++--------------
> drivers/gpu/drm/vmwgfx/vmwgfx_fence.h | 2 +
> drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 6 +-
> 3 files changed, 68 insertions(+), 48 deletions(-)
>
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> index 8e689b4..f40c36e 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
> @@ -47,6 +47,7 @@ struct vmw_fence_manager {
> bool seqno_valid; /* Protected by @lock, and may not be set to true
> without the @goal_irq_mutex held. */
> unsigned ctx;
> + struct tasklet_struct tasklet;
Bottom halves are super-deprecated except for giant existing users like
networking. I think the recommended way to do this is to either use
threaded interrupts or work-queues. The reason for that seems to be that
locking is funky around them, which is a major pain for RT. And RT is
going mainline now for real.
-Daniel
> };
>
> struct vmw_user_fence {
> @@ -81,6 +82,8 @@ struct vmw_event_fence_action {
> uint32_t *tv_usec;
> };
>
> +static void vmw_fence_tasklet(unsigned long data);
> +
> static struct vmw_fence_manager *
> fman_from_fence(struct vmw_fence_obj *fence)
> {
> @@ -115,12 +118,11 @@ static void vmw_fence_obj_destroy(struct fence *f)
> container_of(f, struct vmw_fence_obj, base);
>
> struct vmw_fence_manager *fman = fman_from_fence(fence);
> - unsigned long irq_flags;
>
> - spin_lock_irqsave(&fman->lock, irq_flags);
> + spin_lock_bh(&fman->lock);
> list_del_init(&fence->head);
> --fman->num_fence_objects;
> - spin_unlock_irqrestore(&fman->lock, irq_flags);
> + spin_unlock_bh(&fman->lock);
> fence->destroy(fence);
> }
>
> @@ -177,7 +179,6 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout)
> struct vmw_private *dev_priv = fman->dev_priv;
> struct vmwgfx_wait_cb cb;
> long ret = timeout;
> - unsigned long irq_flags;
>
> if (likely(vmw_fence_obj_signaled(fence)))
> return timeout;
> @@ -185,7 +186,7 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout)
> vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
> vmw_seqno_waiter_add(dev_priv);
>
> - spin_lock_irqsave(f->lock, irq_flags);
> + spin_lock_bh(f->lock);
>
> if (intr && signal_pending(current)) {
> ret = -ERESTARTSYS;
> @@ -205,11 +206,11 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout)
> __set_current_state(TASK_INTERRUPTIBLE);
> else
> __set_current_state(TASK_UNINTERRUPTIBLE);
> - spin_unlock_irqrestore(f->lock, irq_flags);
> + spin_unlock_bh(f->lock);
>
> ret = schedule_timeout(ret);
>
> - spin_lock_irqsave(f->lock, irq_flags);
> + spin_lock_bh(f->lock);
> if (ret > 0 && intr && signal_pending(current))
> ret = -ERESTARTSYS;
> }
> @@ -219,7 +220,7 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout)
> __set_current_state(TASK_RUNNING);
>
> out:
> - spin_unlock_irqrestore(f->lock, irq_flags);
> + spin_unlock_bh(f->lock);
>
> vmw_seqno_waiter_remove(dev_priv);
>
> @@ -300,21 +301,22 @@ struct vmw_fence_manager *vmw_fence_manager_init(struct vmw_private *dev_priv)
> ttm_round_pot(sizeof(struct vmw_event_fence_action));
> mutex_init(&fman->goal_irq_mutex);
> fman->ctx = fence_context_alloc(1);
> + tasklet_init(&fman->tasklet, vmw_fence_tasklet,
> + (unsigned long) fman);
>
> return fman;
> }
>
> void vmw_fence_manager_takedown(struct vmw_fence_manager *fman)
> {
> - unsigned long irq_flags;
> bool lists_empty;
>
> (void) cancel_work_sync(&fman->work);
>
> - spin_lock_irqsave(&fman->lock, irq_flags);
> + spin_lock_bh(&fman->lock);
> lists_empty = list_empty(&fman->fence_list) &&
> list_empty(&fman->cleanup_list);
> - spin_unlock_irqrestore(&fman->lock, irq_flags);
> + spin_unlock_bh(&fman->lock);
>
> BUG_ON(!lists_empty);
> kfree(fman);
> @@ -324,7 +326,6 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman,
> struct vmw_fence_obj *fence, u32 seqno,
> void (*destroy) (struct vmw_fence_obj *fence))
> {
> - unsigned long irq_flags;
> int ret = 0;
>
> fence_init(&fence->base, &vmw_fence_ops, &fman->lock,
> @@ -332,7 +333,7 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman,
> INIT_LIST_HEAD(&fence->seq_passed_actions);
> fence->destroy = destroy;
>
> - spin_lock_irqsave(&fman->lock, irq_flags);
> + spin_lock_bh(&fman->lock);
> if (unlikely(fman->fifo_down)) {
> ret = -EBUSY;
> goto out_unlock;
> @@ -341,7 +342,7 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman,
> ++fman->num_fence_objects;
>
> out_unlock:
> - spin_unlock_irqrestore(&fman->lock, irq_flags);
> + spin_unlock_bh(&fman->lock);
> return ret;
>
> }
> @@ -490,11 +491,9 @@ rerun:
>
> void vmw_fences_update(struct vmw_fence_manager *fman)
> {
> - unsigned long irq_flags;
> -
> - spin_lock_irqsave(&fman->lock, irq_flags);
> + spin_lock_bh(&fman->lock);
> __vmw_fences_update(fman);
> - spin_unlock_irqrestore(&fman->lock, irq_flags);
> + spin_unlock_bh(&fman->lock);
> }
>
> bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence)
> @@ -694,11 +693,9 @@ void vmw_fence_fifo_down(struct vmw_fence_manager *fman)
>
> void vmw_fence_fifo_up(struct vmw_fence_manager *fman)
> {
> - unsigned long irq_flags;
> -
> - spin_lock_irqsave(&fman->lock, irq_flags);
> + spin_lock_bh(&fman->lock);
> fman->fifo_down = false;
> - spin_unlock_irqrestore(&fman->lock, irq_flags);
> + spin_unlock_bh(&fman->lock);
> }
>
>
> @@ -825,10 +822,9 @@ void vmw_event_fence_fpriv_gone(struct vmw_fence_manager *fman,
> {
> struct vmw_event_fence_action *eaction;
> struct drm_pending_event *event;
> - unsigned long irq_flags;
>
> while (1) {
> - spin_lock_irqsave(&fman->lock, irq_flags);
> + spin_lock_bh(&fman->lock);
> if (list_empty(event_list))
> goto out_unlock;
> eaction = list_first_entry(event_list,
> @@ -837,11 +833,11 @@ void vmw_event_fence_fpriv_gone(struct vmw_fence_manager *fman,
> list_del_init(&eaction->fpriv_head);
> event = eaction->event;
> eaction->event = NULL;
> - spin_unlock_irqrestore(&fman->lock, irq_flags);
> + spin_unlock_bh(&fman->lock);
> event->destroy(event);
> }
> out_unlock:
> - spin_unlock_irqrestore(&fman->lock, irq_flags);
> + spin_unlock_bh(&fman->lock);
> }
>
>
> @@ -854,7 +850,7 @@ out_unlock:
> * This function is called when the seqno of the fence where @action is
> * attached has passed. It queues the event on the submitter's event list.
> * This function is always called from atomic context, and may be called
> - * from irq context.
> + * from tasklet context.
> */
> static void vmw_event_fence_action_seq_passed(struct vmw_fence_action *action)
> {
> @@ -863,13 +859,12 @@ static void vmw_event_fence_action_seq_passed(struct vmw_fence_action *action)
> struct drm_device *dev = eaction->dev;
> struct drm_pending_event *event = eaction->event;
> struct drm_file *file_priv;
> - unsigned long irq_flags;
>
> if (unlikely(event == NULL))
> return;
>
> file_priv = event->file_priv;
> - spin_lock_irqsave(&dev->event_lock, irq_flags);
> + spin_lock_bh(&dev->event_lock);
>
> if (likely(eaction->tv_sec != NULL)) {
> struct timeval tv;
> @@ -883,7 +878,7 @@ static void vmw_event_fence_action_seq_passed(struct vmw_fence_action *action)
> list_add_tail(&eaction->event->link, &file_priv->event_list);
> eaction->event = NULL;
> wake_up_all(&file_priv->event_wait);
> - spin_unlock_irqrestore(&dev->event_lock, irq_flags);
> + spin_unlock_bh(&dev->event_lock);
> }
>
> /**
> @@ -900,11 +895,10 @@ static void vmw_event_fence_action_cleanup(struct vmw_fence_action *action)
> struct vmw_event_fence_action *eaction =
> container_of(action, struct vmw_event_fence_action, action);
> struct vmw_fence_manager *fman = fman_from_fence(eaction->fence);
> - unsigned long irq_flags;
>
> - spin_lock_irqsave(&fman->lock, irq_flags);
> + spin_lock_bh(&fman->lock);
> list_del(&eaction->fpriv_head);
> - spin_unlock_irqrestore(&fman->lock, irq_flags);
> + spin_unlock_bh(&fman->lock);
>
> vmw_fence_obj_unreference(&eaction->fence);
> kfree(eaction);
> @@ -924,11 +918,10 @@ static void vmw_fence_obj_add_action(struct vmw_fence_obj *fence,
> struct vmw_fence_action *action)
> {
> struct vmw_fence_manager *fman = fman_from_fence(fence);
> - unsigned long irq_flags;
> bool run_update = false;
>
> mutex_lock(&fman->goal_irq_mutex);
> - spin_lock_irqsave(&fman->lock, irq_flags);
> + spin_lock_bh(&fman->lock);
>
> fman->pending_actions[action->type]++;
> if (fence_is_signaled_locked(&fence->base)) {
> @@ -947,7 +940,7 @@ static void vmw_fence_obj_add_action(struct vmw_fence_obj *fence,
> run_update = vmw_fence_goal_check_locked(fence);
> }
>
> - spin_unlock_irqrestore(&fman->lock, irq_flags);
> + spin_unlock_bh(&fman->lock);
>
> if (run_update) {
> if (!fman->goal_irq_on) {
> @@ -985,7 +978,6 @@ int vmw_event_fence_action_queue(struct drm_file *file_priv,
> struct vmw_event_fence_action *eaction;
> struct vmw_fence_manager *fman = fman_from_fence(fence);
> struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
> - unsigned long irq_flags;
>
> eaction = kzalloc(sizeof(*eaction), GFP_KERNEL);
> if (unlikely(eaction == NULL))
> @@ -1002,9 +994,9 @@ int vmw_event_fence_action_queue(struct drm_file *file_priv,
> eaction->tv_sec = tv_sec;
> eaction->tv_usec = tv_usec;
>
> - spin_lock_irqsave(&fman->lock, irq_flags);
> + spin_lock_bh(&fman->lock);
> list_add_tail(&eaction->fpriv_head, &vmw_fp->fence_events);
> - spin_unlock_irqrestore(&fman->lock, irq_flags);
> + spin_unlock_bh(&fman->lock);
>
> vmw_fence_obj_add_action(fence, &eaction->action);
>
> @@ -1025,16 +1017,15 @@ static int vmw_event_fence_action_create(struct drm_file *file_priv,
> struct vmw_event_fence_pending *event;
> struct vmw_fence_manager *fman = fman_from_fence(fence);
> struct drm_device *dev = fman->dev_priv->dev;
> - unsigned long irq_flags;
> int ret;
>
> - spin_lock_irqsave(&dev->event_lock, irq_flags);
> + spin_lock_bh(&dev->event_lock);
>
> ret = (file_priv->event_space < sizeof(event->event)) ? -EBUSY : 0;
> if (likely(ret == 0))
> file_priv->event_space -= sizeof(event->event);
>
> - spin_unlock_irqrestore(&dev->event_lock, irq_flags);
> + spin_unlock_bh(&dev->event_lock);
>
> if (unlikely(ret != 0)) {
> DRM_ERROR("Failed to allocate event space for this file.\n");
> @@ -1078,9 +1069,9 @@ static int vmw_event_fence_action_create(struct drm_file *file_priv,
> out_no_queue:
> event->base.destroy(&event->base);
> out_no_event:
> - spin_lock_irqsave(&dev->event_lock, irq_flags);
> + spin_lock_bh(&dev->event_lock);
> file_priv->event_space += sizeof(*event);
> - spin_unlock_irqrestore(&dev->event_lock, irq_flags);
> + spin_unlock_bh(&dev->event_lock);
> out_no_space:
> return ret;
> }
> @@ -1172,3 +1163,32 @@ out_no_ref_obj:
> vmw_fence_obj_unreference(&fence);
> return ret;
> }
> +
> +/**
> + * vmw_fence_tasklet - Fence manager tasklet entry point
> + *
> + * @data: The tasklet closure - A pointer to the fence manager cast to an
> + * unsigned long.
> + */
> +static void vmw_fence_tasklet(unsigned long data)
> +{
> + struct vmw_fence_manager *fman = (struct vmw_fence_manager *) data;
> +
> + spin_lock(&fman->lock);
> + __vmw_fences_update(fman);
> + spin_unlock(&fman->lock);
> + wake_up_all(&fman->dev_priv->fence_queue);
> +}
> +
> +/**
> + * vmw_fence_tasklet_schedule - Schedule a fence manager tasklet run
> + *
> + * @fman: Pointer to a fence manager
> + */
> +void vmw_fence_tasklet_schedule(struct vmw_fence_manager *fman)
> +{
> + if (!fman)
> + return;
> +
> + tasklet_schedule(&fman->tasklet);
> +}
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
> index 8be6c29..e55b2c9 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
> @@ -124,4 +124,6 @@ extern int vmw_event_fence_action_queue(struct drm_file *filee_priv,
> uint32_t *tv_sec,
> uint32_t *tv_usec,
> bool interruptible);
> +extern void vmw_fence_tasklet_schedule(struct vmw_fence_manager *fman);
> +
> #endif /* _VMWGFX_FENCE_H_ */
> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
> index ac3eccd..b0a6e65 100644
> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
> @@ -48,10 +48,8 @@ irqreturn_t vmw_irq_handler(int irq, void *arg)
> return IRQ_NONE;
>
> if (masked_status & (SVGA_IRQFLAG_ANY_FENCE |
> - SVGA_IRQFLAG_FENCE_GOAL)) {
> - vmw_fences_update(dev_priv->fman);
> - wake_up_all(&dev_priv->fence_queue);
> - }
> + SVGA_IRQFLAG_FENCE_GOAL))
> + vmw_fence_tasklet_schedule(dev_priv->fman);
>
> if (masked_status & SVGA_IRQFLAG_FIFO_PROGRESS)
> wake_up_all(&dev_priv->fifo_queue);
> --
> 2.4.3
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 2/4] drm/vmwgfx: Defer fence irq processing to a tasklet
2015-10-30 10:23 ` Daniel Vetter
@ 2015-10-30 11:08 ` Thomas Hellstrom
0 siblings, 0 replies; 6+ messages in thread
From: Thomas Hellstrom @ 2015-10-30 11:08 UTC (permalink / raw)
To: Daniel Vetter; +Cc: dri-devel
On 10/30/2015 11:23 AM, Daniel Vetter wrote:
> On Fri, Oct 30, 2015 at 02:42:44AM -0700, Thomas Hellstrom wrote:
>> Reduce the time in hardware irq context and hardware irq latency.
>>
>> Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
>> Reviewed-by: Sinclair Yeh <syeh@vmware.com>
>> ---
>> drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 108 ++++++++++++++++++++--------------
>> drivers/gpu/drm/vmwgfx/vmwgfx_fence.h | 2 +
>> drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 6 +-
>> 3 files changed, 68 insertions(+), 48 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
>> index 8e689b4..f40c36e 100644
>> --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
>> +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
>> @@ -47,6 +47,7 @@ struct vmw_fence_manager {
>> bool seqno_valid; /* Protected by @lock, and may not be set to true
>> without the @goal_irq_mutex held. */
>> unsigned ctx;
>> + struct tasklet_struct tasklet;
> Bottom halves are super-deprecated except for giant existing users like
> networking. I think the recommended way to do this is to either use
> threaded interrupts or work-queues. The reason for that seems to be that
> locking is funky around them, which is a major pain for RT. And RT is
> going mainline now for real.
> -Daniel
>
>
Thanks for the heads up. Unfortunately work-queues introduce too much
latency for this use-case.
Given that we (vmwgfx) already is an existing user (albeit not giant
:)), and RT in a VM guest is somewhat unlikely
I wonder how much of an issue this is.
I'll read up on threaded interrupts.
/Thomas
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2015-10-30 11:08 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-10-30 9:42 [PATCH 1/4] drm/vmwgfx: Replace iowrite/ioread with volatile memory accesses Thomas Hellstrom
2015-10-30 9:42 ` [PATCH 2/4] drm/vmwgfx: Defer fence irq processing to a tasklet Thomas Hellstrom
2015-10-30 10:23 ` Daniel Vetter
2015-10-30 11:08 ` Thomas Hellstrom
2015-10-30 9:42 ` [PATCH 3/4] drm/vmwgfx: Relax irq locking somewhat Thomas Hellstrom
2015-10-30 9:42 ` [PATCH 4/4] drm/vmwgfx: Rework locking around register accesses Thomas Hellstrom
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.