xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer
@ 2011-05-06 18:25 Olaf Hering
  2011-05-06 18:25 ` [PATCH 1 of 5] Move the global variable t_info_first_offset into calculate_tbuf_size() Olaf Hering
                   ` (5 more replies)
  0 siblings, 6 replies; 10+ messages in thread
From: Olaf Hering @ 2011-05-06 18:25 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap

This series implements non-contiguous trace buffers.
Please review.

For some reason its not possible to allocate more than 128MB with repeated
calls to alloc_xen_heappage(). Any ideas how to reach the theoretical limit of
256MB per cpu?
Also the error path in alloc_trace_bufs() needs a fix, I always run into the
assert there.

Olaf

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1 of 5] Move the global variable t_info_first_offset into calculate_tbuf_size()
  2011-05-06 18:25 [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering
@ 2011-05-06 18:25 ` Olaf Hering
  2011-05-06 18:25 ` [PATCH 2 of 5] Mark data_size __read_mostly because its only written once Olaf Hering
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Olaf Hering @ 2011-05-06 18:25 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap

# HG changeset patch
# User Olaf Hering <olaf@aepfle.de>
# Date 1304697395 -7200
# Node ID a19b5f66ce46efd6f8f697583f9bdbc2b567fdbd
# Parent  39f2942fe56bda90d3285b9f2d4e214f0712375f
Move the global variable t_info_first_offset into calculate_tbuf_size()
because it is only used there. Change the type from u32 to uint32_t to
match type in other places.

Signed-off-by: Olaf Hering <olaf@aepfle.de>

diff -r 39f2942fe56b -r a19b5f66ce46 xen/common/trace.c
--- a/xen/common/trace.c	Wed May 04 14:46:32 2011 +0100
+++ b/xen/common/trace.c	Fri May 06 17:56:35 2011 +0200
@@ -55,7 +55,6 @@ static DEFINE_PER_CPU_READ_MOSTLY(struct
 static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
 static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
 static u32 data_size;
-static u32 t_info_first_offset __read_mostly;
 
 /* High water mark for trace buffers; */
 /* Send virtual interrupt when buffer level reaches this point */
@@ -94,10 +93,10 @@ static struct notifier_block cpu_nfb = {
     .notifier_call = cpu_callback
 };
 
-static void calc_tinfo_first_offset(void)
+static uint32_t calc_tinfo_first_offset(void)
 {
     int offset_in_bytes = offsetof(struct t_info, mfn_offset[NR_CPUS]);
-    t_info_first_offset = fit_to_type(uint32_t, offset_in_bytes);
+    return fit_to_type(uint32_t, offset_in_bytes);
 }
 
 /**
@@ -107,7 +106,7 @@ static void calc_tinfo_first_offset(void
  * The t_info layout is fixed and cant be changed without breaking xentrace.
  * Initialize t_info_pages based on number of trace pages.
  */
-static int calculate_tbuf_size(unsigned int pages)
+static int calculate_tbuf_size(unsigned int pages, uint32_t t_info_first_offset)
 {
     struct t_buf dummy_size;
     typeof(dummy_size.prod) max_size;
@@ -156,6 +155,7 @@ static int alloc_trace_bufs(unsigned int
     int i, cpu, order;
     /* Start after a fixed-size array of NR_CPUS */
     uint32_t *t_info_mfn_list;
+    uint32_t t_info_first_offset;
     int offset;
 
     if ( t_info )
@@ -165,9 +165,9 @@ static int alloc_trace_bufs(unsigned int
         return -EINVAL;
 
     /* Calculate offset in u32 of first mfn */
-    calc_tinfo_first_offset();
+    t_info_first_offset = calc_tinfo_first_offset();
 
-    pages = calculate_tbuf_size(pages);
+    pages = calculate_tbuf_size(pages, t_info_first_offset);
     order = get_order_from_pages(pages);
 
     t_info = alloc_xenheap_pages(get_order_from_pages(t_info_pages), 0);

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 2 of 5] Mark data_size __read_mostly because its only written once
  2011-05-06 18:25 [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering
  2011-05-06 18:25 ` [PATCH 1 of 5] Move the global variable t_info_first_offset into calculate_tbuf_size() Olaf Hering
@ 2011-05-06 18:25 ` Olaf Hering
  2011-05-06 18:25 ` [PATCH 3 of 5] Remove unneeded cast when assigning pointer value to dst Olaf Hering
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Olaf Hering @ 2011-05-06 18:25 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap

# HG changeset patch
# User Olaf Hering <olaf@aepfle.de>
# Date 1304697395 -7200
# Node ID 91c40bb4c01a331a41ab6a14a2b5ec7d12e86a76
# Parent  a19b5f66ce46efd6f8f697583f9bdbc2b567fdbd
Mark data_size __read_mostly because its only written once.

Signed-off-by: Olaf Hering <olaf@aepfle.de>

diff -r a19b5f66ce46 -r 91c40bb4c01a xen/common/trace.c
--- a/xen/common/trace.c	Fri May 06 17:56:35 2011 +0200
+++ b/xen/common/trace.c	Fri May 06 17:56:35 2011 +0200
@@ -54,7 +54,7 @@ static unsigned int t_info_pages;
 static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
 static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
 static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
-static u32 data_size;
+static u32 data_size __read_mostly;
 
 /* High water mark for trace buffers; */
 /* Send virtual interrupt when buffer level reaches this point */

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 3 of 5] Remove unneeded cast when assigning pointer value to dst
  2011-05-06 18:25 [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering
  2011-05-06 18:25 ` [PATCH 1 of 5] Move the global variable t_info_first_offset into calculate_tbuf_size() Olaf Hering
  2011-05-06 18:25 ` [PATCH 2 of 5] Mark data_size __read_mostly because its only written once Olaf Hering
@ 2011-05-06 18:25 ` Olaf Hering
  2011-05-06 18:25 ` [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns Olaf Hering
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 10+ messages in thread
From: Olaf Hering @ 2011-05-06 18:25 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap

# HG changeset patch
# User Olaf Hering <olaf@aepfle.de>
# Date 1304697395 -7200
# Node ID 1631b61acaa8e88437d0f1861409ab1824de2721
# Parent  91c40bb4c01a331a41ab6a14a2b5ec7d12e86a76
Remove unneeded cast when assigning pointer value to dst.
Both arrays are uint32_t and memcpy takes a void pointer.

Signed-off-by: Olaf Hering <olaf@aepfle.de>

diff -r 91c40bb4c01a -r 1631b61acaa8 xen/common/trace.c
--- a/xen/common/trace.c	Fri May 06 17:56:35 2011 +0200
+++ b/xen/common/trace.c	Fri May 06 17:56:35 2011 +0200
@@ -483,7 +483,7 @@ static inline void __insert_record(struc
                                    const void *extra_data)
 {
     struct t_rec *rec;
-    unsigned char *dst;
+    uint32_t *dst;
     unsigned int extra_word = extra / sizeof(u32);
     unsigned int local_rec_size = calc_rec_size(cycles, extra);
     uint32_t next;
@@ -508,13 +508,13 @@ static inline void __insert_record(struc
 
     rec->event = event;
     rec->extra_u32 = extra_word;
-    dst = (unsigned char *)rec->u.nocycles.extra_u32;
+    dst = rec->u.nocycles.extra_u32;
     if ( (rec->cycles_included = cycles) != 0 )
     {
         u64 tsc = (u64)get_cycles();
         rec->u.cycles.cycles_lo = (uint32_t)tsc;
         rec->u.cycles.cycles_hi = (uint32_t)(tsc >> 32);
-        dst = (unsigned char *)rec->u.cycles.extra_u32;
+        dst = rec->u.cycles.extra_u32;
     } 
 
     if ( extra_data && extra )

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns
  2011-05-06 18:25 [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering
                   ` (2 preceding siblings ...)
  2011-05-06 18:25 ` [PATCH 3 of 5] Remove unneeded cast when assigning pointer value to dst Olaf Hering
@ 2011-05-06 18:25 ` Olaf Hering
  2011-05-09  9:03   ` Keir Fraser
  2011-05-09 11:24   ` George Dunlap
  2011-05-06 18:25 ` [PATCH 5 of 5] Allocate non-contiguous per-cpu trace buffers Olaf Hering
  2011-05-08 15:07 ` [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering
  5 siblings, 2 replies; 10+ messages in thread
From: Olaf Hering @ 2011-05-06 18:25 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap

# HG changeset patch
# User Olaf Hering <olaf@aepfle.de>
# Date 1304700881 -7200
# Node ID 1c5da4d9e33c821b9e3276d7aefe7ee16ce7b162
# Parent  1631b61acaa8e88437d0f1861409ab1824de2721
Update __insert_record() to copy the trace record to individual mfns.
This is a prereq before changing the per-cpu allocation from contiguous
to non-contiguous allocation.

Signed-off-by: Olaf Hering <olaf@aepfle.de>

diff -r 1631b61acaa8 -r 1c5da4d9e33c xen/common/trace.c
--- a/xen/common/trace.c	Fri May 06 17:56:35 2011 +0200
+++ b/xen/common/trace.c	Fri May 06 18:54:41 2011 +0200
@@ -52,7 +52,6 @@ static struct t_info *t_info;
 static unsigned int t_info_pages;
 
 static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
-static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
 static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
 static u32 data_size __read_mostly;
 
@@ -193,7 +192,6 @@ static int alloc_trace_bufs(unsigned int
 
         per_cpu(t_bufs, cpu) = buf = rawbuf;
         buf->cons = buf->prod = 0;
-        per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
     }
 
     offset = t_info_first_offset;
@@ -457,10 +455,16 @@ static inline u32 calc_bytes_avail(const
     return data_size - calc_unconsumed_bytes(buf);
 }
 
-static inline struct t_rec *next_record(const struct t_buf *buf,
-                                        uint32_t *next)
+static unsigned char *next_record(const struct t_buf *buf, uint32_t *next,
+                                 unsigned char **next_page,
+                                 uint32_t *offset_in_page)
 {
     u32 x = buf->prod, cons = buf->cons;
+    uint32_t per_cpu_mfn_offset;
+    uint32_t per_cpu_mfn_nr;
+    uint32_t *mfn_list;
+    uint32_t mfn;
+    unsigned char *this_page;
 
     barrier(); /* must read buf->prod and buf->cons only once */
     *next = x;
@@ -472,7 +476,27 @@ static inline struct t_rec *next_record(
 
     ASSERT(x < data_size);
 
-    return (struct t_rec *)&this_cpu(t_data)[x];
+    /* add leading header to get total offset of next record */
+    x += sizeof(struct t_buf);
+    *offset_in_page = x % PAGE_SIZE;
+
+    /* offset into array of mfns */
+    per_cpu_mfn_nr = x / PAGE_SIZE;
+    per_cpu_mfn_offset = t_info->mfn_offset[smp_processor_id()];
+    mfn_list = (uint32_t *)t_info;
+    mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr];
+    this_page = mfn_to_virt(mfn);
+    if (per_cpu_mfn_nr + 1 >= opt_tbuf_size)
+    {
+        /* reached end of buffer? */
+        *next_page = NULL;
+    }
+    else
+    {
+        mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr + 1];
+        *next_page = mfn_to_virt(mfn);
+    }
+    return this_page;
 }
 
 static inline void __insert_record(struct t_buf *buf,
@@ -482,28 +506,37 @@ static inline void __insert_record(struc
                                    unsigned int rec_size,
                                    const void *extra_data)
 {
-    struct t_rec *rec;
+    struct t_rec split_rec, *rec;
     uint32_t *dst;
+    unsigned char *this_page, *next_page;
     unsigned int extra_word = extra / sizeof(u32);
     unsigned int local_rec_size = calc_rec_size(cycles, extra);
     uint32_t next;
+    uint32_t offset;
+    uint32_t remaining;
 
     BUG_ON(local_rec_size != rec_size);
     BUG_ON(extra & 3);
 
-    rec = next_record(buf, &next);
-    if ( !rec )
+    this_page = next_record(buf, &next, &next_page, &offset);
+    if ( !this_page )
         return;
-    /* Double-check once more that we have enough space.
-     * Don't bugcheck here, in case the userland tool is doing
-     * something stupid. */
-    if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size )
+
+    remaining = PAGE_SIZE - offset;
+
+    if ( unlikely(rec_size > remaining) )
     {
-        if ( printk_ratelimit() )
+        if ( next_page == NULL )
+        {
+            /* access beyond end of buffer */
             printk(XENLOG_WARNING
-                   "%s: size=%08x prod=%08x cons=%08x rec=%u\n",
-                   __func__, data_size, next, buf->cons, rec_size);
-        return;
+                   "%s: size=%08x prod=%08x cons=%08x rec=%u remaining=%u\n",
+                   __func__, data_size, next, buf->cons, rec_size, remaining);
+            return;
+        }
+        rec = &split_rec;
+    } else {
+        rec = (struct t_rec*)(this_page + offset);
     }
 
     rec->event = event;
@@ -520,6 +553,12 @@ static inline void __insert_record(struc
     if ( extra_data && extra )
         memcpy(dst, extra_data, extra);
 
+    if ( unlikely(rec_size > remaining) )
+    {
+        memcpy(this_page + offset, rec, remaining);
+        memcpy(next_page, (char *)rec + remaining, rec_size - remaining);
+    }
+
     wmb();
 
     next += rec_size;

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 5 of 5] Allocate non-contiguous per-cpu trace buffers
  2011-05-06 18:25 [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering
                   ` (3 preceding siblings ...)
  2011-05-06 18:25 ` [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns Olaf Hering
@ 2011-05-06 18:25 ` Olaf Hering
  2011-05-08 15:07 ` [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering
  5 siblings, 0 replies; 10+ messages in thread
From: Olaf Hering @ 2011-05-06 18:25 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap

# HG changeset patch
# User Olaf Hering <olaf@aepfle.de>
# Date 1304706230 -7200
# Node ID bcd0b17bf8a3ab08760b8dcc1ca276defab1ed71
# Parent  1c5da4d9e33c821b9e3276d7aefe7ee16ce7b162
Allocate non-contiguous per-cpu trace buffers.

Signed-off-by: Olaf Hering <olaf@aepfle.de>

diff -r 1c5da4d9e33c -r bcd0b17bf8a3 xen/common/trace.c
--- a/xen/common/trace.c	Fri May 06 18:54:41 2011 +0200
+++ b/xen/common/trace.c	Fri May 06 20:23:50 2011 +0200
@@ -151,7 +151,7 @@ static int calculate_tbuf_size(unsigned 
  */
 static int alloc_trace_bufs(unsigned int pages)
 {
-    int i, cpu, order;
+    int i, cpu;
     /* Start after a fixed-size array of NR_CPUS */
     uint32_t *t_info_mfn_list;
     uint32_t t_info_first_offset;
@@ -167,32 +167,10 @@ static int alloc_trace_bufs(unsigned int
     t_info_first_offset = calc_tinfo_first_offset();
 
     pages = calculate_tbuf_size(pages, t_info_first_offset);
-    order = get_order_from_pages(pages);
 
     t_info = alloc_xenheap_pages(get_order_from_pages(t_info_pages), 0);
     if ( t_info == NULL )
-        goto out_dealloc;
-
-    /*
-     * First, allocate buffers for all of the cpus.  If any
-     * fails, deallocate what you have so far and exit. 
-     */
-    for_each_online_cpu(cpu)
-    {
-        void *rawbuf;
-        struct t_buf *buf;
-
-        if ( (rawbuf = alloc_xenheap_pages(
-                order, MEMF_bits(32 + PAGE_SHIFT))) == NULL )
-        {
-            printk(XENLOG_INFO "xentrace: memory allocation failed "
-                   "on cpu %d\n", cpu);
-            goto out_dealloc;
-        }
-
-        per_cpu(t_bufs, cpu) = buf = rawbuf;
-        buf->cons = buf->prod = 0;
-    }
+        goto out_dealloc_t_info;
 
     offset = t_info_first_offset;
     t_info_mfn_list = (uint32_t *)t_info;
@@ -204,27 +182,50 @@ static int alloc_trace_bufs(unsigned int
     t_info->tbuf_size = pages;
 
     /*
-     * Now share the pages so xentrace can map them, and write them in
-     * the global t_info structure.
+     * Allocate buffers for all of the cpus.
+     * If any fails, deallocate what you have so far and exit.
      */
     for_each_online_cpu(cpu)
     {
-        void *rawbuf = per_cpu(t_bufs, cpu);
-        struct page_info *p = virt_to_page(rawbuf);
-        uint32_t mfn = virt_to_mfn(rawbuf);
-
-        for ( i = 0; i < pages; i++ )
-        {
-            share_xen_page_with_privileged_guests(p + i, XENSHARE_writable);
-
-            t_info_mfn_list[offset + i]=mfn + i;
-        }
-        t_info->mfn_offset[cpu]=offset;
-        printk(XENLOG_INFO "xentrace: p%d mfn %"PRIx32" offset %d\n",
-               cpu, mfn, offset);
-        offset+=i;
+        void *p;
+        struct t_buf *buf;
+        struct page_info *pg;
 
         spin_lock_init(&per_cpu(t_lock, cpu));
+        /* first allocate the first page, it contains the per-cpu metadata */
+        p = alloc_xenheap_pages(0, MEMF_bits(32 + PAGE_SHIFT));
+        if ( !p )
+        {
+            printk(XENLOG_INFO "xentrace: memory allocation failed "
+                   "on cpu %d after %d pages\n", cpu, 0);
+            goto out_dealloc;
+        }
+        per_cpu(t_bufs, cpu) = buf = p;
+        buf->cons = buf->prod = 0;
+
+        t_info->mfn_offset[cpu] = offset;
+        t_info_mfn_list[offset] = virt_to_mfn(p);
+        pg = virt_to_page(p);
+        share_xen_page_with_privileged_guests(pg, XENSHARE_writable);
+
+        printk(XENLOG_INFO "xentrace: p%d mfn %lx offset %d\n",
+               cpu, virt_to_mfn(p), offset);
+
+        /* now the remaining trace pages */
+        offset++;
+        for ( i = 1; i < pages; i++ )
+        {
+            p = alloc_xenheap_pages(0, MEMF_bits(32 + PAGE_SHIFT));
+            if ( !p )
+            {
+                printk(XENLOG_INFO "xentrace: memory allocation failed "
+                       "on cpu %d after %d pages\n", cpu, i);
+                goto out_dealloc;
+            }
+            t_info_mfn_list[offset++] = virt_to_mfn(p);
+            pg = virt_to_page(p);
+            share_xen_page_with_privileged_guests(pg, XENSHARE_writable);
+        }
     }
 
     data_size  = (pages * PAGE_SIZE - sizeof(struct t_buf));
@@ -240,14 +241,18 @@ static int alloc_trace_bufs(unsigned int
 out_dealloc:
     for_each_online_cpu(cpu)
     {
-        void *rawbuf = per_cpu(t_bufs, cpu);
         per_cpu(t_bufs, cpu) = NULL;
-        if ( rawbuf )
+        offset = t_info->mfn_offset[cpu];
+        for ( i = 0; i < pages; i++ )
         {
-            ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
-            free_xenheap_pages(rawbuf, order);
+            uint32_t mfn = t_info_mfn_list[offset + i];
+            if ( !mfn )
+                break;
+            ASSERT(!(mfn_to_page(mfn)->count_info & PGC_allocated));
+            free_xenheap_pages(mfn_to_virt(mfn), 0);
         }
     }
+out_dealloc_t_info:
     free_xenheap_pages(t_info, get_order_from_pages(t_info_pages));
     t_info = NULL;
     printk(XENLOG_WARNING "xentrace: allocation failed! Tracing disabled.\n");

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer
  2011-05-06 18:25 [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering
                   ` (4 preceding siblings ...)
  2011-05-06 18:25 ` [PATCH 5 of 5] Allocate non-contiguous per-cpu trace buffers Olaf Hering
@ 2011-05-08 15:07 ` Olaf Hering
  5 siblings, 0 replies; 10+ messages in thread
From: Olaf Hering @ 2011-05-08 15:07 UTC (permalink / raw)
  To: xen-devel; +Cc: George Dunlap

On Fri, May 06, Olaf Hering wrote:

> For some reason its not possible to allocate more than 128MB with repeated
> calls to alloc_xen_heappage(). Any ideas how to reach the theoretical limit of
> 256MB per cpu?

The dom0 needs to be ballooned down to make room for the tracebuffers.

> Also the error path in alloc_trace_bufs() needs a fix, I always run into the
> assert there.

Beside this issue, the checks in the bogus() function trigger with a
256MB per-cpu buffer.

I will revisit the series and post a new version once I have fixes for
these issues.

Olaf

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns
  2011-05-06 18:25 ` [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns Olaf Hering
@ 2011-05-09  9:03   ` Keir Fraser
  2011-05-09  9:31     ` Olaf Hering
  2011-05-09 11:24   ` George Dunlap
  1 sibling, 1 reply; 10+ messages in thread
From: Keir Fraser @ 2011-05-09  9:03 UTC (permalink / raw)
  To: Olaf Hering, xen-devel; +Cc: George Dunlap

On 06/05/2011 19:25, "Olaf Hering" <olaf@aepfle.de> wrote:

> # HG changeset patch
> # User Olaf Hering <olaf@aepfle.de>
> # Date 1304700881 -7200
> # Node ID 1c5da4d9e33c821b9e3276d7aefe7ee16ce7b162
> # Parent  1631b61acaa8e88437d0f1861409ab1824de2721
> Update __insert_record() to copy the trace record to individual mfns.
> This is a prereq before changing the per-cpu allocation from contiguous
> to non-contiguous allocation.

I applied the trivial patches 1-3. I'll wait for Acks from George for
patches 4-5.

 -- Keir

> Signed-off-by: Olaf Hering <olaf@aepfle.de>
> 
> diff -r 1631b61acaa8 -r 1c5da4d9e33c xen/common/trace.c
> --- a/xen/common/trace.c Fri May 06 17:56:35 2011 +0200
> +++ b/xen/common/trace.c Fri May 06 18:54:41 2011 +0200
> @@ -52,7 +52,6 @@ static struct t_info *t_info;
>  static unsigned int t_info_pages;
>  
>  static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
> -static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
>  static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
>  static u32 data_size __read_mostly;
>  
> @@ -193,7 +192,6 @@ static int alloc_trace_bufs(unsigned int
>  
>          per_cpu(t_bufs, cpu) = buf = rawbuf;
>          buf->cons = buf->prod = 0;
> -        per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
>      }
>  
>      offset = t_info_first_offset;
> @@ -457,10 +455,16 @@ static inline u32 calc_bytes_avail(const
>      return data_size - calc_unconsumed_bytes(buf);
>  }
>  
> -static inline struct t_rec *next_record(const struct t_buf *buf,
> -                                        uint32_t *next)
> +static unsigned char *next_record(const struct t_buf *buf, uint32_t *next,
> +                                 unsigned char **next_page,
> +                                 uint32_t *offset_in_page)
>  {
>      u32 x = buf->prod, cons = buf->cons;
> +    uint32_t per_cpu_mfn_offset;
> +    uint32_t per_cpu_mfn_nr;
> +    uint32_t *mfn_list;
> +    uint32_t mfn;
> +    unsigned char *this_page;
>  
>      barrier(); /* must read buf->prod and buf->cons only once */
>      *next = x;
> @@ -472,7 +476,27 @@ static inline struct t_rec *next_record(
>  
>      ASSERT(x < data_size);
>  
> -    return (struct t_rec *)&this_cpu(t_data)[x];
> +    /* add leading header to get total offset of next record */
> +    x += sizeof(struct t_buf);
> +    *offset_in_page = x % PAGE_SIZE;
> +
> +    /* offset into array of mfns */
> +    per_cpu_mfn_nr = x / PAGE_SIZE;
> +    per_cpu_mfn_offset = t_info->mfn_offset[smp_processor_id()];
> +    mfn_list = (uint32_t *)t_info;
> +    mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr];
> +    this_page = mfn_to_virt(mfn);
> +    if (per_cpu_mfn_nr + 1 >= opt_tbuf_size)
> +    {
> +        /* reached end of buffer? */
> +        *next_page = NULL;
> +    }
> +    else
> +    {
> +        mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr + 1];
> +        *next_page = mfn_to_virt(mfn);
> +    }
> +    return this_page;
>  }
>  
>  static inline void __insert_record(struct t_buf *buf,
> @@ -482,28 +506,37 @@ static inline void __insert_record(struc
>                                     unsigned int rec_size,
>                                     const void *extra_data)
>  {
> -    struct t_rec *rec;
> +    struct t_rec split_rec, *rec;
>      uint32_t *dst;
> +    unsigned char *this_page, *next_page;
>      unsigned int extra_word = extra / sizeof(u32);
>      unsigned int local_rec_size = calc_rec_size(cycles, extra);
>      uint32_t next;
> +    uint32_t offset;
> +    uint32_t remaining;
>  
>      BUG_ON(local_rec_size != rec_size);
>      BUG_ON(extra & 3);
>  
> -    rec = next_record(buf, &next);
> -    if ( !rec )
> +    this_page = next_record(buf, &next, &next_page, &offset);
> +    if ( !this_page )
>          return;
> -    /* Double-check once more that we have enough space.
> -     * Don't bugcheck here, in case the userland tool is doing
> -     * something stupid. */
> -    if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size )
> +
> +    remaining = PAGE_SIZE - offset;
> +
> +    if ( unlikely(rec_size > remaining) )
>      {
> -        if ( printk_ratelimit() )
> +        if ( next_page == NULL )
> +        {
> +            /* access beyond end of buffer */
>              printk(XENLOG_WARNING
> -                   "%s: size=%08x prod=%08x cons=%08x rec=%u\n",
> -                   __func__, data_size, next, buf->cons, rec_size);
> -        return;
> +                   "%s: size=%08x prod=%08x cons=%08x rec=%u remaining=%u\n",
> +                   __func__, data_size, next, buf->cons, rec_size,
> remaining);
> +            return;
> +        }
> +        rec = &split_rec;
> +    } else {
> +        rec = (struct t_rec*)(this_page + offset);
>      }
>  
>      rec->event = event;
> @@ -520,6 +553,12 @@ static inline void __insert_record(struc
>      if ( extra_data && extra )
>          memcpy(dst, extra_data, extra);
>  
> +    if ( unlikely(rec_size > remaining) )
> +    {
> +        memcpy(this_page + offset, rec, remaining);
> +        memcpy(next_page, (char *)rec + remaining, rec_size - remaining);
> +    }
> +
>      wmb();
>  
>      next += rec_size;
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns
  2011-05-09  9:03   ` Keir Fraser
@ 2011-05-09  9:31     ` Olaf Hering
  0 siblings, 0 replies; 10+ messages in thread
From: Olaf Hering @ 2011-05-09  9:31 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel, George Dunlap

On Mon, May 09, Keir Fraser wrote:

> On 06/05/2011 19:25, "Olaf Hering" <olaf@aepfle.de> wrote:
> 
> > # HG changeset patch
> > # User Olaf Hering <olaf@aepfle.de>
> > # Date 1304700881 -7200
> > # Node ID 1c5da4d9e33c821b9e3276d7aefe7ee16ce7b162
> > # Parent  1631b61acaa8e88437d0f1861409ab1824de2721
> > Update __insert_record() to copy the trace record to individual mfns.
> > This is a prereq before changing the per-cpu allocation from contiguous
> > to non-contiguous allocation.
> 
> I applied the trivial patches 1-3. I'll wait for Acks from George for
> patches 4-5.

Thanks Keir.

There are still issues with large buffers, I will post a new series.

Olaf

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns
  2011-05-06 18:25 ` [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns Olaf Hering
  2011-05-09  9:03   ` Keir Fraser
@ 2011-05-09 11:24   ` George Dunlap
  1 sibling, 0 replies; 10+ messages in thread
From: George Dunlap @ 2011-05-09 11:24 UTC (permalink / raw)
  To: Olaf Hering; +Cc: George Dunlap, xen-devel@lists.xensource.com

On Fri, 2011-05-06 at 19:25 +0100, Olaf Hering wrote:
> # HG changeset patch
> # User Olaf Hering <olaf@aepfle.de>
> # Date 1304700881 -7200
> # Node ID 1c5da4d9e33c821b9e3276d7aefe7ee16ce7b162
> # Parent  1631b61acaa8e88437d0f1861409ab1824de2721
> Update __insert_record() to copy the trace record to individual mfns.
> This is a prereq before changing the per-cpu allocation from contiguous
> to non-contiguous allocation.
> 
> Signed-off-by: Olaf Hering <olaf@aepfle.de>
> 
> diff -r 1631b61acaa8 -r 1c5da4d9e33c xen/common/trace.c
> --- a/xen/common/trace.c	Fri May 06 17:56:35 2011 +0200
> +++ b/xen/common/trace.c	Fri May 06 18:54:41 2011 +0200
> @@ -52,7 +52,6 @@ static struct t_info *t_info;
>  static unsigned int t_info_pages;
>  
>  static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
> -static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
>  static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
>  static u32 data_size __read_mostly;
>  
> @@ -193,7 +192,6 @@ static int alloc_trace_bufs(unsigned int
>  
>          per_cpu(t_bufs, cpu) = buf = rawbuf;
>          buf->cons = buf->prod = 0;
> -        per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
>      }
>  
>      offset = t_info_first_offset;
> @@ -457,10 +455,16 @@ static inline u32 calc_bytes_avail(const
>      return data_size - calc_unconsumed_bytes(buf);
>  }
>  
> -static inline struct t_rec *next_record(const struct t_buf *buf,
> -                                        uint32_t *next)
> +static unsigned char *next_record(const struct t_buf *buf, uint32_t *next,
> +                                 unsigned char **next_page,
> +                                 uint32_t *offset_in_page)
>  {
>      u32 x = buf->prod, cons = buf->cons;
> +    uint32_t per_cpu_mfn_offset;
> +    uint32_t per_cpu_mfn_nr;
> +    uint32_t *mfn_list;
> +    uint32_t mfn;
> +    unsigned char *this_page;
>  
>      barrier(); /* must read buf->prod and buf->cons only once */
>      *next = x;
> @@ -472,7 +476,27 @@ static inline struct t_rec *next_record(
>  
>      ASSERT(x < data_size);
>  
> -    return (struct t_rec *)&this_cpu(t_data)[x];
> +    /* add leading header to get total offset of next record */
> +    x += sizeof(struct t_buf);
> +    *offset_in_page = x % PAGE_SIZE;
> +
> +    /* offset into array of mfns */
> +    per_cpu_mfn_nr = x / PAGE_SIZE;
> +    per_cpu_mfn_offset = t_info->mfn_offset[smp_processor_id()];
> +    mfn_list = (uint32_t *)t_info;
> +    mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr];
> +    this_page = mfn_to_virt(mfn);
> +    if (per_cpu_mfn_nr + 1 >= opt_tbuf_size)
> +    {
> +        /* reached end of buffer? */
> +        *next_page = NULL;
> +    }
> +    else
> +    {
> +        mfn = mfn_list[per_cpu_mfn_offset + per_cpu_mfn_nr + 1];
> +        *next_page = mfn_to_virt(mfn);
> +    }
> +    return this_page;
>  }

General approach here looks good, but I'm wondering if there's a way to
reduce the math here.  The amount of work done for each trace record
posted is really getting pretty big.

I guess first of all the '%' and '/' should be &(PAGE_SIZE-1) and
>>(PAGE_SHIFT), respectively.

Would it make sense to pre-cache the virtual address of the various MFNs
(i.e., keep a per-cpu virtual address list) rather than doing the
calculation each time?  That might reduce the number of instructions to
find the approprate virtual addresses.

 -George

>  
>  static inline void __insert_record(struct t_buf *buf,
> @@ -482,28 +506,37 @@ static inline void __insert_record(struc
>                                     unsigned int rec_size,
>                                     const void *extra_data)
>  {
> -    struct t_rec *rec;
> +    struct t_rec split_rec, *rec;
>      uint32_t *dst;
> +    unsigned char *this_page, *next_page;
>      unsigned int extra_word = extra / sizeof(u32);
>      unsigned int local_rec_size = calc_rec_size(cycles, extra);
>      uint32_t next;
> +    uint32_t offset;
> +    uint32_t remaining;
>  
>      BUG_ON(local_rec_size != rec_size);
>      BUG_ON(extra & 3);
>  
> -    rec = next_record(buf, &next);
> -    if ( !rec )
> +    this_page = next_record(buf, &next, &next_page, &offset);
> +    if ( !this_page )
>          return;
> -    /* Double-check once more that we have enough space.
> -     * Don't bugcheck here, in case the userland tool is doing
> -     * something stupid. */
> -    if ( (unsigned char *)rec + rec_size > this_cpu(t_data) + data_size )
> +
> +    remaining = PAGE_SIZE - offset;
> +
> +    if ( unlikely(rec_size > remaining) )
>      {
> -        if ( printk_ratelimit() )
> +        if ( next_page == NULL )
> +        {
> +            /* access beyond end of buffer */
>              printk(XENLOG_WARNING
> -                   "%s: size=%08x prod=%08x cons=%08x rec=%u\n",
> -                   __func__, data_size, next, buf->cons, rec_size);
> -        return;
> +                   "%s: size=%08x prod=%08x cons=%08x rec=%u remaining=%u\n",
> +                   __func__, data_size, next, buf->cons, rec_size, remaining);
> +            return;
> +        }
> +        rec = &split_rec;
> +    } else {
> +        rec = (struct t_rec*)(this_page + offset);
>      }
>  
>      rec->event = event;
> @@ -520,6 +553,12 @@ static inline void __insert_record(struc
>      if ( extra_data && extra )
>          memcpy(dst, extra_data, extra);
>  
> +    if ( unlikely(rec_size > remaining) )
> +    {
> +        memcpy(this_page + offset, rec, remaining);
> +        memcpy(next_page, (char *)rec + remaining, rec_size - remaining);
> +    }
> +
>      wmb();
>  
>      next += rec_size;

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2011-05-09 11:24 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-05-06 18:25 [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering
2011-05-06 18:25 ` [PATCH 1 of 5] Move the global variable t_info_first_offset into calculate_tbuf_size() Olaf Hering
2011-05-06 18:25 ` [PATCH 2 of 5] Mark data_size __read_mostly because its only written once Olaf Hering
2011-05-06 18:25 ` [PATCH 3 of 5] Remove unneeded cast when assigning pointer value to dst Olaf Hering
2011-05-06 18:25 ` [PATCH 4 of 5] Update __insert_record() to copy the trace record to individual mfns Olaf Hering
2011-05-09  9:03   ` Keir Fraser
2011-05-09  9:31     ` Olaf Hering
2011-05-09 11:24   ` George Dunlap
2011-05-06 18:25 ` [PATCH 5 of 5] Allocate non-contiguous per-cpu trace buffers Olaf Hering
2011-05-08 15:07 ` [PATCH 0 of 5] xentrace: non-contiguous allocation of per-cpu buffer Olaf Hering

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).