From: Thomas Hellstrom <thellstrom@vmware.com>
To: dri-devel@lists.freedesktop.org
Cc: Thomas Hellstrom <thellstrom@vmware.com>
Subject: [PATCH 4/4] drm/vmwgfx: Rework locking around register accesses
Date: Fri, 30 Oct 2015 02:42:46 -0700 [thread overview]
Message-ID: <1446198166-3068-4-git-send-email-thellstrom@vmware.com> (raw)
In-Reply-To: <1446198166-3068-1-git-send-email-thellstrom@vmware.com>
Relax the required locking to, instead of disabling irqs, disable tasklets.
Allow the caller to perform the locking using utility functions, which
allows the caller to combine a number of register accesses within the
same critical section. Implement this for capability reads and command
buffer submission.
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Sinclair Yeh <syeh@vmware.com>
---
drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c | 8 +-
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 9 +--
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 130 ++++++++++++++++++++++++++++-----
drivers/gpu/drm/vmwgfx/vmwgfx_fence.c | 6 +-
drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c | 40 +++++++---
drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c | 20 ++---
drivers/gpu/drm/vmwgfx/vmwgfx_irq.c | 2 +-
7 files changed, 163 insertions(+), 52 deletions(-)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
index 8a76821..b8d2436 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
@@ -290,18 +290,20 @@ void vmw_cmdbuf_header_free(struct vmw_cmdbuf_header *header)
*/
static int vmw_cmdbuf_header_submit(struct vmw_cmdbuf_header *header)
{
- struct vmw_cmdbuf_man *man = header->man;
+ struct vmw_private *dev_priv = header->man->dev_priv;
u32 val;
if (sizeof(header->handle) > 4)
val = (header->handle >> 32);
else
val = 0;
- vmw_write(man->dev_priv, SVGA_REG_COMMAND_HIGH, val);
+ vmw_hw_lock(dev_priv, true);
+ __vmw_write(dev_priv, SVGA_REG_COMMAND_HIGH, val);
val = (header->handle & 0xFFFFFFFFULL);
val |= header->cb_context & SVGA_CB_CONTEXT_MASK;
- vmw_write(man->dev_priv, SVGA_REG_COMMAND_LOW, val);
+ __vmw_write(dev_priv, SVGA_REG_COMMAND_LOW, val);
+ vmw_hw_unlock(dev_priv, true);
return header->cb_header->status;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index a09cf85..2a5496a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -631,7 +631,6 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
ttm_lock_init(&dev_priv->reservation_sem);
spin_lock_init(&dev_priv->hw_lock);
spin_lock_init(&dev_priv->waiter_lock);
- spin_lock_init(&dev_priv->cap_lock);
spin_lock_init(&dev_priv->svga_lock);
for (i = vmw_res_context; i < vmw_res_max; ++i) {
@@ -854,10 +853,10 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
}
if (dev_priv->has_mob) {
- spin_lock(&dev_priv->cap_lock);
- vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_DX);
- dev_priv->has_dx = !!vmw_read(dev_priv, SVGA_REG_DEV_CAP);
- spin_unlock(&dev_priv->cap_lock);
+ vmw_hw_lock(dev_priv, true);
+ __vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_DX);
+ dev_priv->has_dx = !!__vmw_read(dev_priv, SVGA_REG_DEV_CAP);
+ vmw_hw_unlock(dev_priv, true);
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index a8ae9df..f7dcbcc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -385,7 +385,6 @@ struct vmw_private {
bool has_gmr;
bool has_mob;
spinlock_t hw_lock;
- spinlock_t cap_lock;
bool has_dx;
/*
@@ -546,34 +545,127 @@ static inline struct vmw_master *vmw_master(struct drm_master *master)
return (struct vmw_master *) master->driver_priv;
}
-/*
- * The locking here is fine-grained, so that it is performed once
- * for every read- and write operation. This is of course costly, but we
- * don't perform much register access in the timing critical paths anyway.
- * Instead we have the extra benefit of being sure that we don't forget
- * the hw lock around register accesses.
+/**
+ * vmw_hw_lock - Perform necessary locking for register access
+ *
+ * @dev_priv: Pointer to device private structure
+ * @lock_bh: Disable bottom halves. Always set to true unless irqs are disabled.
*/
-static inline void vmw_write(struct vmw_private *dev_priv,
- unsigned int offset, uint32_t value)
+static inline void vmw_hw_lock(struct vmw_private *dev_priv,
+ bool lock_bh)
{
- unsigned long irq_flags;
+ if (lock_bh)
+ spin_lock_bh(&dev_priv->hw_lock);
+ else
+ spin_lock(&dev_priv->hw_lock);
+}
- spin_lock_irqsave(&dev_priv->hw_lock, irq_flags);
+/**
+ * vmw_hw_unlock - Perform necessary unlocking after register access
+ *
+ * @dev_priv: Pointer to device private structure
+ * @unlock_bh: Disable bottom halves. Always set to true if it was set to tru
+ * in the corresponding lock.
+ */
+static inline void vmw_hw_unlock(struct vmw_private *dev_priv,
+ bool unlock_bh)
+{
+ if (unlock_bh)
+ spin_unlock_bh(&dev_priv->hw_lock);
+ else
+ spin_unlock(&dev_priv->hw_lock);
+}
+
+/**
+ * __vmw_write - Write a value to a device register with external locking.
+ *
+ * @dev_priv: Pointer to the device private structure
+ * @offset: Device register offset
+ * @value: Value to write
+ *
+ * Note that the caller must ensure that when calling this function,
+ * @dev_priv::hw_lock must be held and that we're safe against racing
+ * against tasklets.
+ */
+static inline void __vmw_write(struct vmw_private *dev_priv,
+ unsigned int offset, uint32_t value)
+{
+#ifdef CONFIG_LOCKDEP
+ lockdep_assert_held_once(&dev_priv->hw_lock.rlock);
+ WARN_ON_ONCE(!(in_softirq() || irqs_disabled()));
+#endif
outl(offset, dev_priv->io_start + VMWGFX_INDEX_PORT);
outl(value, dev_priv->io_start + VMWGFX_VALUE_PORT);
- spin_unlock_irqrestore(&dev_priv->hw_lock, irq_flags);
}
-static inline uint32_t vmw_read(struct vmw_private *dev_priv,
- unsigned int offset)
+/**
+ * __vmw_read - Read a value from a device register with external locking.
+ *
+ * @dev_priv: Pointer to the device private structure
+ * @offset: Device register offset
+ * @value: Value to write
+ *
+ * Note that the caller must ensure that when calling this function,
+ * @dev_priv::hw_lock is be held and that we're safe against racing
+ * against tasklets.
+ */
+static inline uint32_t __vmw_read(struct vmw_private *dev_priv,
+ unsigned int offset)
{
- unsigned long irq_flags;
u32 val;
- spin_lock_irqsave(&dev_priv->hw_lock, irq_flags);
+#ifdef CONFIG_LOCKDEP
+ lockdep_assert_held_once(&dev_priv->hw_lock.rlock);
+ WARN_ON_ONCE(!(in_softirq() || irqs_disabled()));
+#endif
outl(offset, dev_priv->io_start + VMWGFX_INDEX_PORT);
val = inl(dev_priv->io_start + VMWGFX_VALUE_PORT);
- spin_unlock_irqrestore(&dev_priv->hw_lock, irq_flags);
+
+ return val;
+}
+
+/**
+ * vmw_write - Write a value to a device register
+ *
+ * @dev_priv: Pointer to the device private structure
+ * @offset: Device register offset
+ * @value: Value to write
+ *
+ * This function may not be called when irqs are disabled, since
+ * its call to vmw_hw_unlock will re-enable irqs.
+ */
+static inline void vmw_write(struct vmw_private *dev_priv,
+ unsigned int offset, uint32_t value)
+{
+#ifdef CONFIG_LOCKDEP
+ WARN_ON_ONCE(irqs_disabled());
+#endif
+ vmw_hw_lock(dev_priv, true);
+ __vmw_write(dev_priv, offset, value);
+ vmw_hw_unlock(dev_priv, true);
+}
+
+/**
+ * vmw_write - Read a value from a device register
+ *
+ * @dev_priv: Pointer to the device private structure
+ * @offset: Device register offset
+ * @value: Value to write
+ *
+ * This function may not be called when irqs are disabled, since
+ * its call to vmw_hw_unlock will re-enable irqs.
+ */
+static inline uint32_t vmw_read(struct vmw_private *dev_priv,
+ unsigned int offset)
+{
+ u32 val;
+
+#ifdef CONFIG_LOCKDEP
+ WARN_ON_ONCE(irqs_disabled());
+#endif
+ vmw_hw_lock(dev_priv, true);
+ val = __vmw_read(dev_priv, offset);
+ vmw_hw_unlock(dev_priv, true);
return val;
}
@@ -722,8 +814,8 @@ extern void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes);
extern void vmw_fifo_commit_flush(struct vmw_private *dev_priv, uint32_t bytes);
extern int vmw_fifo_send_fence(struct vmw_private *dev_priv,
uint32_t *seqno);
-extern void vmw_fifo_ping_host_locked(struct vmw_private *, uint32_t reason);
-extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason);
+extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason,
+ bool lock_bh);
extern bool vmw_fifo_have_3d(struct vmw_private *dev_priv);
extern bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv);
extern int vmw_fifo_emit_dummy_query(struct vmw_private *dev_priv,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index f40c36e..ac66aad 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -149,7 +149,7 @@ static bool vmw_fence_enable_signaling(struct fence *f)
if (seqno - fence->base.seqno < VMW_FENCE_WRAP)
return false;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, false);
return true;
}
@@ -183,7 +183,7 @@ static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout)
if (likely(vmw_fence_obj_signaled(fence)))
return timeout;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
vmw_seqno_waiter_add(dev_priv);
spin_lock_bh(f->lock);
@@ -525,7 +525,7 @@ void vmw_fence_obj_flush(struct vmw_fence_obj *fence)
{
struct vmw_private *dev_priv = fman_from_fence(fence)->dev_priv;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
}
static void vmw_fence_destroy(struct vmw_fence_obj *fence)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index a8baf5f..a70cc88 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -49,10 +49,10 @@ bool vmw_fifo_have_3d(struct vmw_private *dev_priv)
if (!dev_priv->has_mob)
return false;
- spin_lock(&dev_priv->cap_lock);
- vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_3D);
- result = vmw_read(dev_priv, SVGA_REG_DEV_CAP);
- spin_unlock(&dev_priv->cap_lock);
+ vmw_hw_lock(dev_priv, true);
+ __vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_3D);
+ result = __vmw_read(dev_priv, SVGA_REG_DEV_CAP);
+ vmw_hw_unlock(dev_priv, true);
return (result != 0);
}
@@ -163,14 +163,32 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
return 0;
}
-void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
+/**
+ * vmw_fifo_ping_host - Notify the device that there are device commands
+ * to process
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @reason: A valid reason to ping the host. See device headers.
+ * @lock_bh: Always set to true unless irqs are disabled in which case
+ * it must be set to false.
+ *
+ * Aka the device "doorbell". The device has an optimization that allows
+ * avoiding calling the relatively expensive register write to
+ * SVGA_REG_SYNC if the SVGA_FIFO_BUSY mmio register is set to 1, meaning
+ * that the doorbell has already been called. The SVGA_FIFO_BUSY mmio register
+ * will be cleared by the device when a new write to SVGA_REG_SYNC is needed
+ * to notify the device of pending commands.
+ */
+void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason,
+ bool lock_bh)
{
u32 *fifo_mem = dev_priv->mmio_virt;
- preempt_disable();
- if (cmpxchg(fifo_mem + SVGA_FIFO_BUSY, 0, 1) == 0)
- vmw_write(dev_priv, SVGA_REG_SYNC, reason);
- preempt_enable();
+ if (cmpxchg(fifo_mem + SVGA_FIFO_BUSY, 0, 1) == 0) {
+ vmw_hw_lock(dev_priv, lock_bh);
+ __vmw_write(dev_priv, SVGA_REG_SYNC, reason);
+ vmw_hw_unlock(dev_priv, lock_bh);
+ }
}
void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
@@ -256,7 +274,7 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv,
if (likely(!vmw_fifo_is_full(dev_priv, bytes)))
return 0;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_FIFOFULL);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_FIFOFULL, true);
if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK))
return vmw_fifo_wait_noirq(dev_priv, bytes,
interruptible, timeout);
@@ -490,7 +508,7 @@ static void vmw_local_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes)
vmw_mmio_write(0, fifo_mem + SVGA_FIFO_RESERVED);
mb();
up_write(&fifo_state->rwsem);
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
mutex_unlock(&fifo_state->fifo_mutex);
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index b8c6a03..8ce0886 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -159,14 +159,14 @@ static int vmw_fill_compat_cap(struct vmw_private *dev_priv, void *bounce,
(pair_offset + max_size * sizeof(SVGA3dCapPair)) / sizeof(u32);
compat_cap->header.type = SVGA3DCAPS_RECORD_DEVCAPS;
- spin_lock(&dev_priv->cap_lock);
+ vmw_hw_lock(dev_priv, true);
for (i = 0; i < max_size; ++i) {
- vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
+ __vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
compat_cap->pairs[i][0] = i;
compat_cap->pairs[i][1] = vmw_mask_multisample
- (i, vmw_read(dev_priv, SVGA_REG_DEV_CAP));
+ (i, __vmw_read(dev_priv, SVGA_REG_DEV_CAP));
}
- spin_unlock(&dev_priv->cap_lock);
+ vmw_hw_unlock(dev_priv, true);
return 0;
}
@@ -216,13 +216,13 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
if (num > SVGA3D_DEVCAP_MAX)
num = SVGA3D_DEVCAP_MAX;
- spin_lock(&dev_priv->cap_lock);
+ vmw_hw_lock(dev_priv, true);
for (i = 0; i < num; ++i) {
- vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
+ __vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
*bounce32++ = vmw_mask_multisample
- (i, vmw_read(dev_priv, SVGA_REG_DEV_CAP));
+ (i, __vmw_read(dev_priv, SVGA_REG_DEV_CAP));
}
- spin_unlock(&dev_priv->cap_lock);
+ vmw_hw_unlock(dev_priv, true);
} else if (gb_objects) {
ret = vmw_fill_compat_cap(dev_priv, bounce, size);
if (unlikely(ret != 0))
@@ -420,7 +420,7 @@ unsigned int vmw_fops_poll(struct file *filp, struct poll_table_struct *wait)
struct vmw_private *dev_priv =
vmw_priv(file_priv->minor->dev);
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
return drm_poll(filp, wait);
}
@@ -443,6 +443,6 @@ ssize_t vmw_fops_read(struct file *filp, char __user *buffer,
struct vmw_private *dev_priv =
vmw_priv(file_priv->minor->dev);
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
return drm_read(filp, buffer, count, offset);
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index 12aaed7..af6970b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -246,7 +246,7 @@ int vmw_wait_seqno(struct vmw_private *dev_priv,
if (likely(vmw_seqno_passed(dev_priv, seqno)))
return 0;
- vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+ vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
if (!(fifo->capabilities & SVGA_FIFO_CAP_FENCE))
return vmw_fallback_wait(dev_priv, lazy, true, seqno,
--
2.4.3
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
prev parent reply other threads:[~2015-10-30 9:43 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-10-30 9:42 [PATCH 1/4] drm/vmwgfx: Replace iowrite/ioread with volatile memory accesses Thomas Hellstrom
2015-10-30 9:42 ` [PATCH 2/4] drm/vmwgfx: Defer fence irq processing to a tasklet Thomas Hellstrom
2015-10-30 10:23 ` Daniel Vetter
2015-10-30 11:08 ` Thomas Hellstrom
2015-10-30 9:42 ` [PATCH 3/4] drm/vmwgfx: Relax irq locking somewhat Thomas Hellstrom
2015-10-30 9:42 ` Thomas Hellstrom [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1446198166-3068-4-git-send-email-thellstrom@vmware.com \
--to=thellstrom@vmware.com \
--cc=dri-devel@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.