All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] libdrm: Move intel_atomic.h to libdrm core for sharing.
@ 2010-03-10 16:20 Pauli Nieminen
  2010-03-10 16:20 ` [PATCH 2/2] libdrm_radeon: Optimize reloc writing to do less looping Pauli Nieminen
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Pauli Nieminen @ 2010-03-10 16:20 UTC (permalink / raw)
  To: dri-devel; +Cc: intel-gfx

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 6414 bytes --]

intel_atomic.h includes very usefull atomic operations for
lock free parrallel access of variables. Moving these to
core libdrm for code sharing with radeon.

Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
---
 Makefile.am          |    2 +-
 configure.ac         |    2 +-
 intel/intel_atomic.h |   56 +-----------------------------
 xf86atomic.h         |   93 ++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 96 insertions(+), 57 deletions(-)
 create mode 100644 xf86atomic.h

diff --git a/Makefile.am b/Makefile.am
index ee3ccc7..295121f 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -59,7 +59,7 @@ libdrm_la_SOURCES =				\
 	libdrm_lists.h
 
 libdrmincludedir = ${includedir}
-libdrminclude_HEADERS = xf86drm.h xf86drmMode.h
+libdrminclude_HEADERS = xf86drm.h xf86drmMode.h xf86atomic.h
 
 EXTRA_DIST = libdrm.pc.in include/drm/*
 
diff --git a/configure.ac b/configure.ac
index aaa8efa..953a758 100644
--- a/configure.ac
+++ b/configure.ac
@@ -198,7 +198,7 @@ if test "x$INTEL" != "xno"; then
 
     ])
     if test "x$drm_cv_atomic_primitives" = xIntel; then
-	    AC_DEFINE(HAVE_INTEL_ATOMIC_PRIMITIVES, 1,
+	    AC_DEFINE(HAVE_LIBDRM_ATOMIC_PRIMITIVES, 1,
 		      [Enable if your compiler supports the Intel __sync_* atomic primitives])
     fi
     if test "x$drm_cv_atomic_primitives" = "xlibatomic-ops"; then
diff --git a/intel/intel_atomic.h b/intel/intel_atomic.h
index 12bb96b..dcb4ec8 100644
--- a/intel/intel_atomic.h
+++ b/intel/intel_atomic.h
@@ -34,60 +34,6 @@
 #ifndef INTEL_ATOMICS_H
 #define INTEL_ATOMICS_H
 
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#if HAVE_INTEL_ATOMIC_PRIMITIVES
-
-#define HAS_ATOMIC_OPS 1
-
-typedef struct {
-	int atomic;
-} atomic_t;
-
-# define atomic_read(x) ((x)->atomic)
-# define atomic_set(x, val) ((x)->atomic = (val))
-# define atomic_inc(x) ((void) __sync_fetch_and_add (&(x)->atomic, 1))
-# define atomic_dec_and_test(x) (__sync_fetch_and_add (&(x)->atomic, -1) == 1)
-# define atomic_cmpxchg(x, oldv, newv) __sync_val_compare_and_swap (&(x)->atomic, oldv, newv)
-
-#endif
-
-#if HAVE_LIB_ATOMIC_OPS
-#include <atomic_ops.h>
-
-#define HAS_ATOMIC_OPS 1
-
-typedef struct {
-	AO_t atomic;
-} atomic_t;
-
-# define atomic_read(x) AO_load_full(&(x)->atomic)
-# define atomic_set(x, val) AO_store_full(&(x)->atomic, (val))
-# define atomic_inc(x) ((void) AO_fetch_and_add1_full(&(x)->atomic))
-# define atomic_dec_and_test(x) (AO_fetch_and_sub1_full(&(x)->atomic) == 1)
-# define atomic_cmpxchg(x, oldv, newv) AO_compare_and_swap_full(&(x)->atomic, oldv, newv)
-
-#endif
-
-#if defined(__sun) && !defined(HAS_ATOMIC_OPS)  /* Solaris & OpenSolaris */
-
-#include <sys/atomic.h>
-#define HAS_ATOMIC_OPS 1
-
-typedef struct { uint_t atomic; } atomic_t;
-
-# define atomic_read(x) (int) ((x)->atomic)
-# define atomic_set(x, val) ((x)->atomic = (uint_t)(val))
-# define atomic_inc(x) (atomic_inc_uint (&(x)->atomic))
-# define atomic_dec_and_test(x) (atomic_dec_uint_nv(&(x)->atomic) == 1)
-# define atomic_cmpxchg(x, oldv, newv) atomic_cas_uint (&(x)->atomic, oldv, newv)
-
-#endif
-
-#if ! HAS_ATOMIC_OPS
-#error libdrm-intel requires atomic operations, please define them for your CPU/compiler.
-#endif
+#include <xf86atomic.h>
 
 #endif
diff --git a/xf86atomic.h b/xf86atomic.h
new file mode 100644
index 0000000..627dcf2
--- /dev/null
+++ b/xf86atomic.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright © 2009 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+/**
+ * @file intel_atomics.h
+ *
+ * Private definitions for atomic operations
+ */
+
+#ifndef LIBDRM_ATOMICS_H
+#define LIBDRM_ATOMICS_H
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#if HAVE_LIBDRM_ATOMIC_PRIMITIVES
+
+#define HAS_ATOMIC_OPS 1
+
+typedef struct {
+	int atomic;
+} atomic_t;
+
+# define atomic_read(x) ((x)->atomic)
+# define atomic_set(x, val) ((x)->atomic = (val))
+# define atomic_inc(x) ((void) __sync_fetch_and_add (&(x)->atomic, 1))
+# define atomic_dec_and_test(x) (__sync_fetch_and_add (&(x)->atomic, -1) == 1)
+# define atomic_cmpxchg(x, oldv, newv) __sync_val_compare_and_swap (&(x)->atomic, oldv, newv)
+
+#endif
+
+#if HAVE_LIB_ATOMIC_OPS
+#include <atomic_ops.h>
+
+#define HAS_ATOMIC_OPS 1
+
+typedef struct {
+	AO_t atomic;
+} atomic_t;
+
+# define atomic_read(x) AO_load_full(&(x)->atomic)
+# define atomic_set(x, val) AO_store_full(&(x)->atomic, (val))
+# define atomic_inc(x) ((void) AO_fetch_and_add1_full(&(x)->atomic))
+# define atomic_dec_and_test(x) (AO_fetch_and_sub1_full(&(x)->atomic) == 1)
+# define atomic_cmpxchg(x, oldv, newv) AO_compare_and_swap_full(&(x)->atomic, oldv, newv)
+
+#endif
+
+#if defined(__sun) && !defined(HAS_ATOMIC_OPS)  /* Solaris & OpenSolaris */
+
+#include <sys/atomic.h>
+#define HAS_ATOMIC_OPS 1
+
+typedef struct { uint_t atomic; } atomic_t;
+
+# define atomic_read(x) (int) ((x)->atomic)
+# define atomic_set(x, val) ((x)->atomic = (uint_t)(val))
+# define atomic_inc(x) (atomic_inc_uint (&(x)->atomic))
+# define atomic_dec_and_test(x) (atomic_dec_uint_nv(&(x)->atomic) == 1)
+# define atomic_cmpxchg(x, oldv, newv) atomic_cas_uint (&(x)->atomic, oldv, newv)
+
+#endif
+
+#if ! HAS_ATOMIC_OPS
+#error libdrm-intel requires atomic operations, please define them for your CPU/compiler.
+#endif
+
+#endif
-- 
1.6.3.3



[-- Attachment #2: Type: text/plain, Size: 345 bytes --]

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev

[-- Attachment #3: Type: text/plain, Size: 161 bytes --]

--
_______________________________________________
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/2] libdrm_radeon: Optimize reloc writing to do less looping.
  2010-03-10 16:20 [PATCH 1/2] libdrm: Move intel_atomic.h to libdrm core for sharing Pauli Nieminen
@ 2010-03-10 16:20 ` Pauli Nieminen
  2010-03-10 17:18   ` Michel Dänzer
  2010-03-10 18:11 ` [PATCH 1/2] libdrm: Move intel_atomic.h to libdrm core for sharing Matt Turner
  2010-03-12 23:36 ` Eric Anholt
  2 siblings, 1 reply; 9+ messages in thread
From: Pauli Nieminen @ 2010-03-10 16:20 UTC (permalink / raw)
  To: dri-devel; +Cc: intel-gfx

Bit has table will be first checked from BO if we can quarentee this BO is not
in this cs already.

To quarentee that there is no other cs with same id number of CS that can have
id is limited to 32. Adding and remocing reference in bo is done with atomic
operations to allow parallel access to a bo from multiple contexts.

This optimization decreases cs_write_reloc share of torcs profiling from 4.3%
to 2.6%.

Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
---
 radeon/radeon_bo_gem.c |    1 +
 radeon/radeon_cs.c     |    6 ++
 radeon/radeon_cs.h     |    2 +-
 radeon/radeon_cs_gem.c |  133 ++++++++++++++++++++++++++++++++++++-----------
 radeon/radeon_cs_int.h |    1 +
 5 files changed, 111 insertions(+), 32 deletions(-)

diff --git a/radeon/radeon_bo_gem.c b/radeon/radeon_bo_gem.c
index bc8058d..1b33bdb 100644
--- a/radeon/radeon_bo_gem.c
+++ b/radeon/radeon_bo_gem.c
@@ -80,6 +80,7 @@ static struct radeon_bo *bo_open(struct radeon_bo_manager *bom,
     bo->base.domains = domains;
     bo->base.flags = flags;
     bo->base.ptr = NULL;
+    bo->base.referenced_in_cs = 0;
     bo->map_count = 0;
     if (handle) {
         struct drm_gem_open open_arg;
diff --git a/radeon/radeon_cs.c b/radeon/radeon_cs.c
index cc9be39..d0e922b 100644
--- a/radeon/radeon_cs.c
+++ b/radeon/radeon_cs.c
@@ -88,3 +88,9 @@ void radeon_cs_space_set_flush(struct radeon_cs *cs, void (*fn)(void *), void *d
     csi->space_flush_fn = fn;
     csi->space_flush_data = data;
 }
+
+uint32_t radeon_cs_get_id(struct radeon_cs *cs)
+{
+    struct radeon_cs_int *csi = (struct radeon_cs_int *)cs;
+    return csi->id;
+}
diff --git a/radeon/radeon_cs.h b/radeon/radeon_cs.h
index 49d5d9a..7f6ee68 100644
--- a/radeon/radeon_cs.h
+++ b/radeon/radeon_cs.h
@@ -85,7 +85,7 @@ extern int radeon_cs_write_reloc(struct radeon_cs *cs,
                                  uint32_t read_domain,
                                  uint32_t write_domain,
                                  uint32_t flags);
-
+extern uint32_t radeon_cs_get_id(struct radeon_cs *cs);
 /*
  * add a persistent BO to the list
  * a persistent BO is one that will be referenced across flushes,
diff --git a/radeon/radeon_cs_gem.c b/radeon/radeon_cs_gem.c
index 45a219c..83aabea 100644
--- a/radeon/radeon_cs_gem.c
+++ b/radeon/radeon_cs_gem.c
@@ -32,6 +32,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <stdlib.h>
+#include <pthread.h>
 #include <sys/mman.h>
 #include <sys/ioctl.h>
 #include "radeon_cs.h"
@@ -68,6 +69,66 @@ struct cs_gem {
     struct radeon_bo_int        **relocs_bo;
 };
 
+
+#if !defined(__GNUC__) || __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2)
+/* no built in sync support in compiler define place holders */
+uint32_t __sync_add_and_fetch(uint32_t *a, uint32_t val)
+{
+	*a += val;
+	return val;
+}
+
+uint32_t __sync_add_and_fetch(uint32_t *a, uint32_t val)
+{
+	*a -= val;
+	return val;
+}
+#endif
+
+static pthread_mutex_t id_mutex = PTHREAD_MUTEX_INITIALIZER;
+static uint32_t cs_id_source = 0;
+
+/**
+ * result is undefined if called with ~0
+ */
+static uint32_t get_first_zero(const uint32_t n)
+{
+    /* __builtin_ctz returns number of trailing zeros. */
+    return 1 << __builtin_ctz(~n);
+}
+
+/**
+ * Returns a free id for cs.
+ * If there is no free id we return zero
+ **/
+static uint32_t generate_id(void)
+{
+    uint32_t r = 0;
+    pthread_mutex_lock( &id_mutex );
+    /* check for free ids */
+    if (cs_id_source != ~r) {
+        /* find first zero bit */
+        r = get_first_zero(cs_id_source);
+
+        /* set id as reserved */
+        cs_id_source |= r;
+    }
+    pthread_mutex_unlock( &id_mutex );
+    return r;
+}
+
+/**
+ * Free the id for later reuse
+ **/
+static void free_id(uint32_t id)
+{
+    pthread_mutex_lock( &id_mutex );
+
+    cs_id_source &= ~id;
+
+    pthread_mutex_unlock( &id_mutex );
+}
+
 static struct radeon_cs_int *cs_gem_create(struct radeon_cs_manager *csm,
                                        uint32_t ndw)
 {
@@ -90,6 +151,7 @@ static struct radeon_cs_int *cs_gem_create(struct radeon_cs_manager *csm,
     }
     csg->base.relocs_total_size = 0;
     csg->base.crelocs = 0;
+    csg->base.id = generate_id();
     csg->nrelocs = 4096 / (4 * 4) ;
     csg->relocs_bo = (struct radeon_bo_int**)calloc(1,
                                                 csg->nrelocs*sizeof(void*));
@@ -141,38 +203,43 @@ static int cs_gem_write_reloc(struct radeon_cs_int *cs,
     if (write_domain == RADEON_GEM_DOMAIN_CPU) {
         return -EINVAL;
     }
-    /* check if bo is already referenced */
-    for(i = 0; i < cs->crelocs; i++) {
-        idx = i * RELOC_SIZE;
-        reloc = (struct cs_reloc_gem*)&csg->relocs[idx];
-        if (reloc->handle == bo->handle) {
-            /* Check domains must be in read or write. As we check already
-             * checked that in argument one of the read or write domain was
-             * set we only need to check that if previous reloc as the read
-             * domain set then the read_domain should also be set for this
-             * new relocation.
-             */
-            /* the DDX expects to read and write from same pixmap */
-            if (write_domain && (reloc->read_domain & write_domain)) {
-                reloc->read_domain = 0;
-                reloc->write_domain = write_domain;
-            } else if (read_domain & reloc->write_domain) {
-                reloc->read_domain = 0;
-            } else {
-                if (write_domain != reloc->write_domain)
-                    return -EINVAL;
-                if (read_domain != reloc->read_domain)
-                    return -EINVAL;
+    /* use bit field hash functionto determine
+       if this bo is for sure not in this cs.*/
+    if ((boi->referenced_in_cs & cs->id)) {
+        /* check if bo is already referenced */
+        for(i = cs->crelocs; i != 0;) {
+            --i;
+            idx = i * RELOC_SIZE;
+            reloc = (struct cs_reloc_gem*)&csg->relocs[idx];
+            if (reloc->handle == bo->handle) {
+                /* Check domains must be in read or write. As we check already
+                 * checked that in argument one of the read or write domain was
+                 * set we only need to check that if previous reloc as the read
+                 * domain set then the read_domain should also be set for this
+                 * new relocation.
+                 */
+                /* the DDX expects to read and write from same pixmap */
+                if (write_domain && (reloc->read_domain & write_domain)) {
+                    reloc->read_domain = 0;
+                    reloc->write_domain = write_domain;
+                } else if (read_domain & reloc->write_domain) {
+                    reloc->read_domain = 0;
+                } else {
+                    if (write_domain != reloc->write_domain)
+                        return -EINVAL;
+                    if (read_domain != reloc->read_domain)
+                        return -EINVAL;
+                }
+
+                reloc->read_domain |= read_domain;
+                reloc->write_domain |= write_domain;
+                /* update flags */
+                reloc->flags |= (flags & reloc->flags);
+                /* write relocation packet */
+                radeon_cs_write_dword((struct radeon_cs *)cs, 0xc0001000);
+                radeon_cs_write_dword((struct radeon_cs *)cs, idx);
+                return 0;
             }
-
-            reloc->read_domain |= read_domain;
-            reloc->write_domain |= write_domain;
-            /* update flags */
-            reloc->flags |= (flags & reloc->flags);
-            /* write relocation packet */
-            radeon_cs_write_dword((struct radeon_cs *)cs, 0xc0001000);
-            radeon_cs_write_dword((struct radeon_cs *)cs, idx);
-            return 0;
         }
     }
     /* new relocation */
@@ -203,6 +270,7 @@ static int cs_gem_write_reloc(struct radeon_cs_int *cs,
     reloc->flags = flags;
     csg->chunks[1].length_dw += RELOC_SIZE;
     radeon_bo_ref(bo);
+    __sync_add_and_fetch(&boi->referenced_in_cs, cs->id);
     cs->relocs_total_size += boi->size;
     radeon_cs_write_dword((struct radeon_cs *)cs, 0xc0001000);
     radeon_cs_write_dword((struct radeon_cs *)cs, idx);
@@ -288,6 +356,7 @@ static int cs_gem_emit(struct radeon_cs_int *cs)
                             &csg->cs, sizeof(struct drm_radeon_cs));
     for (i = 0; i < csg->base.crelocs; i++) {
         csg->relocs_bo[i]->space_accounted = 0;
+        __sync_sub_and_fetch(&csg->relocs_bo[i]->referenced_in_cs, cs->id);
         radeon_bo_unref((struct radeon_bo *)csg->relocs_bo[i]);
         csg->relocs_bo[i] = NULL;
     }
@@ -302,6 +371,7 @@ static int cs_gem_destroy(struct radeon_cs_int *cs)
 {
     struct cs_gem *csg = (struct cs_gem*)cs;
 
+    free_id(cs->id);
     free(csg->relocs_bo);
     free(cs->relocs);
     free(cs->packets);
@@ -317,6 +387,7 @@ static int cs_gem_erase(struct radeon_cs_int *cs)
     if (csg->relocs_bo) {
         for (i = 0; i < csg->base.crelocs; i++) {
             if (csg->relocs_bo[i]) {
+                __sync_sub_and_fetch(&csg->relocs_bo[i]->referenced_in_cs, cs->id);
                 radeon_bo_unref((struct radeon_bo *)csg->relocs_bo[i]);
                 csg->relocs_bo[i] = NULL;
             }
diff --git a/radeon/radeon_cs_int.h b/radeon/radeon_cs_int.h
index 8ba76bf..6cee574 100644
--- a/radeon/radeon_cs_int.h
+++ b/radeon/radeon_cs_int.h
@@ -28,6 +28,7 @@ struct radeon_cs_int {
     int                         bo_count;
     void                        (*space_flush_fn)(void *);
     void                        *space_flush_data;
+    uint32_t                    id;
 };
 
 /* cs functions */
-- 
1.6.3.3


------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] libdrm_radeon: Optimize reloc writing to do less looping.
  2010-03-10 16:20 ` [PATCH 2/2] libdrm_radeon: Optimize reloc writing to do less looping Pauli Nieminen
@ 2010-03-10 17:18   ` Michel Dänzer
  2010-03-10 18:26     ` Pauli Nieminen
  0 siblings, 1 reply; 9+ messages in thread
From: Michel Dänzer @ 2010-03-10 17:18 UTC (permalink / raw)
  To: Pauli Nieminen; +Cc: intel-gfx, dri-devel

On Wed, 2010-03-10 at 18:20 +0200, Pauli Nieminen wrote: 
> Bit has table will be first checked from BO if we can quarentee this BO is not
> in this cs already.
> 
> To quarentee that there is no other cs with same id number of CS that can have
> id is limited to 32. Adding and remocing reference in bo is done with atomic
> operations to allow parallel access to a bo from multiple contexts.
> 
> This optimization decreases cs_write_reloc share of torcs profiling from 4.3%
> to 2.6%.
> 
> Signed-off-by: Pauli Nieminen <suokkos@gmail.com>

[...]

> diff --git a/radeon/radeon_cs_gem.c b/radeon/radeon_cs_gem.c
> index 45a219c..83aabea 100644
> --- a/radeon/radeon_cs_gem.c
> +++ b/radeon/radeon_cs_gem.c
> @@ -68,6 +69,66 @@ struct cs_gem {
>      struct radeon_bo_int        **relocs_bo;
>  };
>  
> +
> +#if !defined(__GNUC__) || __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2)
> +/* no built in sync support in compiler define place holders */
> +uint32_t __sync_add_and_fetch(uint32_t *a, uint32_t val)
> +{
> +	*a += val;
> +	return val;
> +}
> +
> +uint32_t __sync_add_and_fetch(uint32_t *a, uint32_t val)
> +{
> +	*a -= val;
> +	return val;
> +}
> +#endif

This doesn't look like it could build... presumably the latter should be
called __sync_sub_and_fetch()?

Do these stand any chance of working properly in circumstances where
atomicity is actually important though?


-- 
Earthling Michel Dänzer           |                http://www.vmware.com
Libre software enthusiast         |          Debian, X and DRI developer

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
_______________________________________________
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] libdrm: Move intel_atomic.h to libdrm core for sharing.
  2010-03-10 16:20 [PATCH 1/2] libdrm: Move intel_atomic.h to libdrm core for sharing Pauli Nieminen
  2010-03-10 16:20 ` [PATCH 2/2] libdrm_radeon: Optimize reloc writing to do less looping Pauli Nieminen
@ 2010-03-10 18:11 ` Matt Turner
  2010-03-10 20:44   ` [Intel-gfx] " Julien Cristau
  2010-03-12 23:36 ` Eric Anholt
  2 siblings, 1 reply; 9+ messages in thread
From: Matt Turner @ 2010-03-10 18:11 UTC (permalink / raw)
  To: Pauli Nieminen; +Cc: intel-gfx, dri-devel

On Wed, Mar 10, 2010 at 11:20 AM, Pauli Nieminen <suokkos@gmail.com> wrote:
> intel_atomic.h includes very usefull atomic operations for
> lock free parrallel access of variables. Moving these to
> core libdrm for code sharing with radeon.
>
> Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
> ---
>  Makefile.am          |    2 +-
>  configure.ac         |    2 +-
>  intel/intel_atomic.h |   56 +-----------------------------
>  xf86atomic.h         |   93 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 96 insertions(+), 57 deletions(-)
>  create mode 100644 xf86atomic.h
>
> diff --git a/Makefile.am b/Makefile.am
> index ee3ccc7..295121f 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -59,7 +59,7 @@ libdrm_la_SOURCES =                           \
>        libdrm_lists.h
>
>  libdrmincludedir = ${includedir}
> -libdrminclude_HEADERS = xf86drm.h xf86drmMode.h
> +libdrminclude_HEADERS = xf86drm.h xf86drmMode.h xf86atomic.h
>
>  EXTRA_DIST = libdrm.pc.in include/drm/*
>
> diff --git a/configure.ac b/configure.ac
> index aaa8efa..953a758 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -198,7 +198,7 @@ if test "x$INTEL" != "xno"; then
>
>     ])
>     if test "x$drm_cv_atomic_primitives" = xIntel; then
> -           AC_DEFINE(HAVE_INTEL_ATOMIC_PRIMITIVES, 1,
> +           AC_DEFINE(HAVE_LIBDRM_ATOMIC_PRIMITIVES, 1,
>                      [Enable if your compiler supports the Intel __sync_* atomic primitives])
>     fi
>     if test "x$drm_cv_atomic_primitives" = "xlibatomic-ops"; then
> diff --git a/intel/intel_atomic.h b/intel/intel_atomic.h
> index 12bb96b..dcb4ec8 100644
> --- a/intel/intel_atomic.h
> +++ b/intel/intel_atomic.h
> @@ -34,60 +34,6 @@
>  #ifndef INTEL_ATOMICS_H
>  #define INTEL_ATOMICS_H
>
> -#ifdef HAVE_CONFIG_H
> -#include "config.h"
> -#endif
> -
> -#if HAVE_INTEL_ATOMIC_PRIMITIVES
> -
> -#define HAS_ATOMIC_OPS 1
> -
> -typedef struct {
> -       int atomic;
> -} atomic_t;
> -
> -# define atomic_read(x) ((x)->atomic)
> -# define atomic_set(x, val) ((x)->atomic = (val))
> -# define atomic_inc(x) ((void) __sync_fetch_and_add (&(x)->atomic, 1))
> -# define atomic_dec_and_test(x) (__sync_fetch_and_add (&(x)->atomic, -1) == 1)
> -# define atomic_cmpxchg(x, oldv, newv) __sync_val_compare_and_swap (&(x)->atomic, oldv, newv)
> -
> -#endif
> -
> -#if HAVE_LIB_ATOMIC_OPS
> -#include <atomic_ops.h>
> -
> -#define HAS_ATOMIC_OPS 1
> -
> -typedef struct {
> -       AO_t atomic;
> -} atomic_t;
> -
> -# define atomic_read(x) AO_load_full(&(x)->atomic)
> -# define atomic_set(x, val) AO_store_full(&(x)->atomic, (val))
> -# define atomic_inc(x) ((void) AO_fetch_and_add1_full(&(x)->atomic))
> -# define atomic_dec_and_test(x) (AO_fetch_and_sub1_full(&(x)->atomic) == 1)
> -# define atomic_cmpxchg(x, oldv, newv) AO_compare_and_swap_full(&(x)->atomic, oldv, newv)
> -
> -#endif
> -
> -#if defined(__sun) && !defined(HAS_ATOMIC_OPS)  /* Solaris & OpenSolaris */
> -
> -#include <sys/atomic.h>
> -#define HAS_ATOMIC_OPS 1
> -
> -typedef struct { uint_t atomic; } atomic_t;
> -
> -# define atomic_read(x) (int) ((x)->atomic)
> -# define atomic_set(x, val) ((x)->atomic = (uint_t)(val))
> -# define atomic_inc(x) (atomic_inc_uint (&(x)->atomic))
> -# define atomic_dec_and_test(x) (atomic_dec_uint_nv(&(x)->atomic) == 1)
> -# define atomic_cmpxchg(x, oldv, newv) atomic_cas_uint (&(x)->atomic, oldv, newv)
> -
> -#endif
> -
> -#if ! HAS_ATOMIC_OPS
> -#error libdrm-intel requires atomic operations, please define them for your CPU/compiler.
> -#endif
> +#include <xf86atomic.h>
>
>  #endif
> diff --git a/xf86atomic.h b/xf86atomic.h
> new file mode 100644
> index 0000000..627dcf2
> --- /dev/null
> +++ b/xf86atomic.h
> @@ -0,0 +1,93 @@
> +/*
> + * Copyright © 2009 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Chris Wilson <chris@chris-wilson.co.uk>
> + *
> + */
> +
> +/**
> + * @file intel_atomics.h

This needs to be updated.

> + *
> + * Private definitions for atomic operations
> + */
> +
> +#ifndef LIBDRM_ATOMICS_H
> +#define LIBDRM_ATOMICS_H
> +
> +#ifdef HAVE_CONFIG_H
> +#include "config.h"
> +#endif
> +
> +#if HAVE_LIBDRM_ATOMIC_PRIMITIVES
> +
> +#define HAS_ATOMIC_OPS 1
> +
> +typedef struct {
> +       int atomic;
> +} atomic_t;
> +
> +# define atomic_read(x) ((x)->atomic)
> +# define atomic_set(x, val) ((x)->atomic = (val))
> +# define atomic_inc(x) ((void) __sync_fetch_and_add (&(x)->atomic, 1))
> +# define atomic_dec_and_test(x) (__sync_fetch_and_add (&(x)->atomic, -1) == 1)
> +# define atomic_cmpxchg(x, oldv, newv) __sync_val_compare_and_swap (&(x)->atomic, oldv, newv)
> +
> +#endif
> +
> +#if HAVE_LIB_ATOMIC_OPS
> +#include <atomic_ops.h>
> +
> +#define HAS_ATOMIC_OPS 1
> +
> +typedef struct {
> +       AO_t atomic;
> +} atomic_t;
> +
> +# define atomic_read(x) AO_load_full(&(x)->atomic)
> +# define atomic_set(x, val) AO_store_full(&(x)->atomic, (val))
> +# define atomic_inc(x) ((void) AO_fetch_and_add1_full(&(x)->atomic))
> +# define atomic_dec_and_test(x) (AO_fetch_and_sub1_full(&(x)->atomic) == 1)
> +# define atomic_cmpxchg(x, oldv, newv) AO_compare_and_swap_full(&(x)->atomic, oldv, newv)
> +
> +#endif
> +
> +#if defined(__sun) && !defined(HAS_ATOMIC_OPS)  /* Solaris & OpenSolaris */
> +
> +#include <sys/atomic.h>
> +#define HAS_ATOMIC_OPS 1
> +
> +typedef struct { uint_t atomic; } atomic_t;
> +
> +# define atomic_read(x) (int) ((x)->atomic)
> +# define atomic_set(x, val) ((x)->atomic = (uint_t)(val))
> +# define atomic_inc(x) (atomic_inc_uint (&(x)->atomic))
> +# define atomic_dec_and_test(x) (atomic_dec_uint_nv(&(x)->atomic) == 1)
> +# define atomic_cmpxchg(x, oldv, newv) atomic_cas_uint (&(x)->atomic, oldv, newv)
> +
> +#endif
> +
> +#if ! HAS_ATOMIC_OPS
> +#error libdrm-intel requires atomic operations, please define them for your CPU/compiler.
> +#endif

This needs to be updated/fixed.
> +
> +#endif
> --
> 1.6.3.3

Matt

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] libdrm_radeon: Optimize reloc writing to do less looping.
  2010-03-10 17:18   ` Michel Dänzer
@ 2010-03-10 18:26     ` Pauli Nieminen
  0 siblings, 0 replies; 9+ messages in thread
From: Pauli Nieminen @ 2010-03-10 18:26 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: intel-gfx, dri-devel

2010/3/10 Michel Dänzer <michel@daenzer.net>:
> On Wed, 2010-03-10 at 18:20 +0200, Pauli Nieminen wrote:
>> Bit has table will be first checked from BO if we can quarentee this BO is not
>> in this cs already.
>>
>> To quarentee that there is no other cs with same id number of CS that can have
>> id is limited to 32. Adding and remocing reference in bo is done with atomic
>> operations to allow parallel access to a bo from multiple contexts.
>>
>> This optimization decreases cs_write_reloc share of torcs profiling from 4.3%
>> to 2.6%.
>>
>> Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
>
> [...]
>
>> diff --git a/radeon/radeon_cs_gem.c b/radeon/radeon_cs_gem.c
>> index 45a219c..83aabea 100644
>> --- a/radeon/radeon_cs_gem.c
>> +++ b/radeon/radeon_cs_gem.c
>> @@ -68,6 +69,66 @@ struct cs_gem {
>>      struct radeon_bo_int        **relocs_bo;
>>  };
>>
>> +
>> +#if !defined(__GNUC__) || __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2)
>> +/* no built in sync support in compiler define place holders */
>> +uint32_t __sync_add_and_fetch(uint32_t *a, uint32_t val)
>> +{
>> +     *a += val;
>> +     return val;
>> +}
>> +
>> +uint32_t __sync_add_and_fetch(uint32_t *a, uint32_t val)
>> +{
>> +     *a -= val;
>> +     return val;
>> +}
>> +#endif
>
> This doesn't look like it could build... presumably the latter should be
> called __sync_sub_and_fetch()?
>

sorry .wrong patch coming from somewhere :/

> Do these stand any chance of working properly in circumstances where
> atomicity is actually important though?
>
>
> --
> Earthling Michel Dänzer           |                http://www.vmware.com
> Libre software enthusiast         |          Debian, X and DRI developer
>

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-gfx] [PATCH 1/2] libdrm: Move intel_atomic.h to libdrm core for sharing.
  2010-03-10 18:11 ` [PATCH 1/2] libdrm: Move intel_atomic.h to libdrm core for sharing Matt Turner
@ 2010-03-10 20:44   ` Julien Cristau
  2010-03-10 22:21     ` Pauli Nieminen
  0 siblings, 1 reply; 9+ messages in thread
From: Julien Cristau @ 2010-03-10 20:44 UTC (permalink / raw)
  To: Matt Turner; +Cc: intel-gfx, dri-devel

> On Wed, Mar 10, 2010 at 11:20 AM, Pauli Nieminen <suokkos@gmail.com> wrote:
> > intel_atomic.h includes very usefull atomic operations for
> > lock free parrallel access of variables. Moving these to
> > core libdrm for code sharing with radeon.
> >
> > Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
> > ---
> >  Makefile.am          |    2 +-
> >  configure.ac         |    2 +-
> >  intel/intel_atomic.h |   56 +-----------------------------
> >  xf86atomic.h         |   93 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  4 files changed, 96 insertions(+), 57 deletions(-)
> >  create mode 100644 xf86atomic.h
> >
> > diff --git a/Makefile.am b/Makefile.am
> > index ee3ccc7..295121f 100644
> > --- a/Makefile.am
> > +++ b/Makefile.am
> > @@ -59,7 +59,7 @@ libdrm_la_SOURCES =                           \
> >        libdrm_lists.h
> >
> >  libdrmincludedir = ${includedir}
> > -libdrminclude_HEADERS = xf86drm.h xf86drmMode.h
> > +libdrminclude_HEADERS = xf86drm.h xf86drmMode.h xf86atomic.h
> >

Should this really get installed?  I'm not sure this should be public
libdrm interface.

Cheers,
Julien

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-gfx] [PATCH 1/2] libdrm: Move intel_atomic.h to libdrm core for sharing.
  2010-03-10 20:44   ` [Intel-gfx] " Julien Cristau
@ 2010-03-10 22:21     ` Pauli Nieminen
  2010-03-10 22:35       ` Chris Wilson
  0 siblings, 1 reply; 9+ messages in thread
From: Pauli Nieminen @ 2010-03-10 22:21 UTC (permalink / raw)
  To: Julien Cristau; +Cc: dri-devel, intel-gfx

On Wed, Mar 10, 2010 at 10:44 PM, Julien Cristau <jcristau@debian.org> wrote:
>> On Wed, Mar 10, 2010 at 11:20 AM, Pauli Nieminen <suokkos@gmail.com> wrote:
>> > intel_atomic.h includes very usefull atomic operations for
>> > lock free parrallel access of variables. Moving these to
>> > core libdrm for code sharing with radeon.
>> >
>> > Signed-off-by: Pauli Nieminen <suokkos@gmail.com>
>> > ---
>> >  Makefile.am          |    2 +-
>> >  configure.ac         |    2 +-
>> >  intel/intel_atomic.h |   56 +-----------------------------
>> >  xf86atomic.h         |   93 ++++++++++++++++++++++++++++++++++++++++++++++++++
>> >  4 files changed, 96 insertions(+), 57 deletions(-)
>> >  create mode 100644 xf86atomic.h
>> >
>> > diff --git a/Makefile.am b/Makefile.am
>> > index ee3ccc7..295121f 100644
>> > --- a/Makefile.am
>> > +++ b/Makefile.am
>> > @@ -59,7 +59,7 @@ libdrm_la_SOURCES =                           \
>> >        libdrm_lists.h
>> >
>> >  libdrmincludedir = ${includedir}
>> > -libdrminclude_HEADERS = xf86drm.h xf86drmMode.h
>> > +libdrminclude_HEADERS = xf86drm.h xf86drmMode.h xf86atomic.h
>> >
>
> Should this really get installed?  I'm not sure this should be public
> libdrm interface.
>
> Cheers,
> Julien
>

Inlte had it installed. And my patch adds it to radeon so that mesa
will reference it indirectly tough radeon_bo_int.h. Maybe dependency
in mesa to radeon_bo_int.h is bad idea in first place.

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [Intel-gfx] [PATCH 1/2] libdrm: Move intel_atomic.h to libdrm core for sharing.
  2010-03-10 22:21     ` Pauli Nieminen
@ 2010-03-10 22:35       ` Chris Wilson
  0 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2010-03-10 22:35 UTC (permalink / raw)
  To: Pauli Nieminen, Julien Cristau; +Cc: intel-gfx, dri-devel

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 415 bytes --]

On Thu, 11 Mar 2010 00:21:13 +0200, Pauli Nieminen <suokkos@gmail.com> wrote:
> On Wed, Mar 10, 2010 at 10:44 PM, Julien Cristau <jcristau@debian.org> wrote:
> > Should this really get installed?  I'm not sure this should be public
> > libdrm interface.
> >
> > Cheers,
> > Julien
> >
> 
> Intel had it installed.

My bad, change it to noinst_HEADERS please.

-- 
Chris Wilson, Intel Open Source Technology Centre


[-- Attachment #2: Type: text/plain, Size: 345 bytes --]

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev

[-- Attachment #3: Type: text/plain, Size: 161 bytes --]

--
_______________________________________________
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/2] libdrm: Move intel_atomic.h to libdrm core for sharing.
  2010-03-10 16:20 [PATCH 1/2] libdrm: Move intel_atomic.h to libdrm core for sharing Pauli Nieminen
  2010-03-10 16:20 ` [PATCH 2/2] libdrm_radeon: Optimize reloc writing to do less looping Pauli Nieminen
  2010-03-10 18:11 ` [PATCH 1/2] libdrm: Move intel_atomic.h to libdrm core for sharing Matt Turner
@ 2010-03-12 23:36 ` Eric Anholt
  2 siblings, 0 replies; 9+ messages in thread
From: Eric Anholt @ 2010-03-12 23:36 UTC (permalink / raw)
  To: Pauli Nieminen, dri-devel; +Cc: intel-gfx


[-- Attachment #1.1: Type: text/plain, Size: 285 bytes --]

On Wed, 10 Mar 2010 18:20:42 +0200, Pauli Nieminen <suokkos@gmail.com> wrote:
> intel_atomic.h includes very usefull atomic operations for
> lock free parrallel access of variables. Moving these to
> core libdrm for code sharing with radeon.

s/xf86/libdrm/ but other than that, cool.

[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]

[-- Attachment #2: Type: text/plain, Size: 345 bytes --]

------------------------------------------------------------------------------
Download Intel&#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev

[-- Attachment #3: Type: text/plain, Size: 161 bytes --]

--
_______________________________________________
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2010-03-12 23:36 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-03-10 16:20 [PATCH 1/2] libdrm: Move intel_atomic.h to libdrm core for sharing Pauli Nieminen
2010-03-10 16:20 ` [PATCH 2/2] libdrm_radeon: Optimize reloc writing to do less looping Pauli Nieminen
2010-03-10 17:18   ` Michel Dänzer
2010-03-10 18:26     ` Pauli Nieminen
2010-03-10 18:11 ` [PATCH 1/2] libdrm: Move intel_atomic.h to libdrm core for sharing Matt Turner
2010-03-10 20:44   ` [Intel-gfx] " Julien Cristau
2010-03-10 22:21     ` Pauli Nieminen
2010-03-10 22:35       ` Chris Wilson
2010-03-12 23:36 ` Eric Anholt

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.