All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0 of 4] trace: Miscellaneous fixes
@ 2010-07-01 10:12 George Dunlap
  2010-07-01 10:12 ` [PATCH 1 of 4] trace: Fix T_INFO_FIRST_OFFSET calculation George Dunlap
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: George Dunlap @ 2010-07-01 10:12 UTC (permalink / raw)
  To: xen-devel; +Cc: george.dunlap

Thanks to Jan Beulich for identifying these and coming up with initial
fixes.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1 of 4] trace: Fix T_INFO_FIRST_OFFSET calculation
  2010-07-01 10:12 [PATCH 0 of 4] trace: Miscellaneous fixes George Dunlap
@ 2010-07-01 10:12 ` George Dunlap
  2010-07-01 10:12 ` [PATCH 2 of 4] trace: improve check_tbuf_size() George Dunlap
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: George Dunlap @ 2010-07-01 10:12 UTC (permalink / raw)
  To: xen-devel; +Cc: george.dunlap

This wasn't defined correctly, thus allowing in the
num_online_cpus() == NR_CPUS case to pass a corrupted MFN to
Dom0.

Reported-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>

diff -r aecf092da748 -r cdffdb3b34b1 xen/common/trace.c
--- a/xen/common/trace.c	Wed Jun 30 22:12:54 2010 +0100
+++ b/xen/common/trace.c	Thu Jul 01 11:00:58 2010 +0100
@@ -50,12 +50,11 @@
 static struct t_info *t_info;
 #define T_INFO_PAGES 2  /* Size fixed at 2 pages for now. */
 #define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE))
-/* t_info.tbuf_size + list of mfn offsets + 1 to round up / sizeof uint32_t */
-#define T_INFO_FIRST_OFFSET ((sizeof(int16_t) + NR_CPUS * sizeof(int16_t) + 1) / sizeof(uint32_t))
 static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
 static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
 static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
 static int data_size;
+static u32 t_info_first_offset __read_mostly;
 
 /* High water mark for trace buffers; */
 /* Send virtual interrupt when buffer level reaches this point */
@@ -75,13 +74,29 @@
 /* which tracing events are enabled */
 static u32 tb_event_mask = TRC_ALL;
 
+/* Return the number of elements _type necessary to store at least _x bytes of data
+ * i.e., sizeof(_type) * ans >= _x. */
+#define fit_to_type(_type, _x) (((_x)+sizeof(_type)-1) / sizeof(_type))
+
+static void calc_tinfo_first_offset(void)
+{
+    int offset_in_bytes;
+    
+    offset_in_bytes = offsetof(struct t_info, mfn_offset[NR_CPUS]);
+
+    t_info_first_offset = fit_to_type(uint32_t, offset_in_bytes);
+
+    gdprintk(XENLOG_INFO, "%s: NR_CPUs %d, offset_in_bytes %d, t_info_first_offset %u\n",
+           __func__, NR_CPUS, offset_in_bytes, (unsigned)t_info_first_offset);
+}
+
 /**
  * check_tbuf_size - check to make sure that the proposed size will fit
  * in the currently sized struct t_info.
  */
 static inline int check_tbuf_size(int size)
 {
-    return (num_online_cpus() * size + T_INFO_FIRST_OFFSET) > (T_INFO_SIZE / sizeof(uint32_t));
+    return (num_online_cpus() * size + t_info_first_offset) > (T_INFO_SIZE / sizeof(uint32_t));
 }
 
 /**
@@ -100,7 +115,7 @@
     unsigned long nr_pages;
     /* Start after a fixed-size array of NR_CPUS */
     uint32_t *t_info_mfn_list = (uint32_t *)t_info;
-    int offset = T_INFO_FIRST_OFFSET;
+    int offset = t_info_first_offset;
 
     BUG_ON(check_tbuf_size(opt_tbuf_size));
 
@@ -293,6 +308,10 @@
 void __init init_trace_bufs(void)
 {
     int i;
+
+    /* Calculate offset in u32 of first mfn */
+    calc_tinfo_first_offset();
+
     /* t_info size fixed at 2 pages for now.  That should be big enough / small enough
      * until it's worth making it dynamic. */
     t_info = alloc_xenheap_pages(1, 0);

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 2 of 4] trace: improve check_tbuf_size()
  2010-07-01 10:12 [PATCH 0 of 4] trace: Miscellaneous fixes George Dunlap
  2010-07-01 10:12 ` [PATCH 1 of 4] trace: Fix T_INFO_FIRST_OFFSET calculation George Dunlap
@ 2010-07-01 10:12 ` George Dunlap
  2010-07-01 10:12 ` [PATCH 3 of 4] trace: adjust printk()s George Dunlap
  2010-07-01 10:12 ` [PATCH 4 of 4] trace: fix security issues George Dunlap
  3 siblings, 0 replies; 5+ messages in thread
From: George Dunlap @ 2010-07-01 10:12 UTC (permalink / raw)
  To: xen-devel; +Cc: george.dunlap

It didn't consider the case of the incoming size not allowing for the
2*data_size range for t_buf->{prod,cons}

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>

diff -r cdffdb3b34b1 -r aef5229e5b96 xen/common/trace.c
--- a/xen/common/trace.c	Thu Jul 01 11:00:58 2010 +0100
+++ b/xen/common/trace.c	Thu Jul 01 11:09:40 2010 +0100
@@ -92,11 +92,19 @@
 
 /**
  * check_tbuf_size - check to make sure that the proposed size will fit
- * in the currently sized struct t_info.
+ * in the currently sized struct t_info and allows prod and cons to
+ * reach double the value without overflow.
  */
-static inline int check_tbuf_size(int size)
+static int check_tbuf_size(u32 pages)
 {
-    return (num_online_cpus() * size + t_info_first_offset) > (T_INFO_SIZE / sizeof(uint32_t));
+    struct t_buf dummy;
+    typeof(dummy.prod) size;
+    
+    size = ((typeof(dummy.prod))pages)  * PAGE_SIZE;
+    
+    return (size / PAGE_SIZE != pages)
+           || (size + size < size)
+           || (num_online_cpus() * pages + t_info_first_offset > T_INFO_SIZE / sizeof(uint32_t));
 }
 
 /**

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 3 of 4] trace: adjust printk()s
  2010-07-01 10:12 [PATCH 0 of 4] trace: Miscellaneous fixes George Dunlap
  2010-07-01 10:12 ` [PATCH 1 of 4] trace: Fix T_INFO_FIRST_OFFSET calculation George Dunlap
  2010-07-01 10:12 ` [PATCH 2 of 4] trace: improve check_tbuf_size() George Dunlap
@ 2010-07-01 10:12 ` George Dunlap
  2010-07-01 10:12 ` [PATCH 4 of 4] trace: fix security issues George Dunlap
  3 siblings, 0 replies; 5+ messages in thread
From: George Dunlap @ 2010-07-01 10:12 UTC (permalink / raw)
  To: xen-devel; +Cc: george.dunlap

They should be lower level or rate limited.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Acked-by: George Dunlap <george.dunlap@eu.citrix.com>

diff -r aef5229e5b96 -r 16ccc9071662 xen/common/trace.c
--- a/xen/common/trace.c	Thu Jul 01 11:09:40 2010 +0100
+++ b/xen/common/trace.c	Thu Jul 01 11:09:49 2010 +0100
@@ -138,7 +138,7 @@
     }
 
     t_info->tbuf_size = opt_tbuf_size;
-    printk("tbuf_size %d\n", t_info->tbuf_size);
+    printk(XENLOG_INFO "tbuf_size %d\n", t_info->tbuf_size);
 
     nr_pages = opt_tbuf_size;
     order = get_order_from_pages(nr_pages);
@@ -195,7 +195,7 @@
             /* Write list first, then write per-cpu offset. */
             wmb();
             t_info->mfn_offset[cpu]=offset;
-            printk("p%d mfn %"PRIx32" offset %d\n",
+            printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n",
                    cpu, mfn, offset);
             offset+=i;
         }
@@ -503,12 +503,13 @@
     /* Double-check once more that we have enough space.
      * Don't bugcheck here, in case the userland tool is doing
      * something stupid. */
-    if ( calc_bytes_avail(buf) < rec_size )
+    next = calc_bytes_avail(buf);
+    if ( next < rec_size )
     {
-        printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n",
-               __func__,
-               calc_bytes_avail(buf),
-               data_size, buf->prod, buf->cons, data_size, rec_size);
+        if ( printk_ratelimit() )
+            printk(XENLOG_WARNING
+                   "%s: avail=%u (size=%08x prod=%08x cons=%08x) rec=%u\n",
+                   __func__, next, data_size, buf->prod, buf->cons, rec_size);
         return 0;
     }
     rmb();

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 4 of 4] trace: fix security issues
  2010-07-01 10:12 [PATCH 0 of 4] trace: Miscellaneous fixes George Dunlap
                   ` (2 preceding siblings ...)
  2010-07-01 10:12 ` [PATCH 3 of 4] trace: adjust printk()s George Dunlap
@ 2010-07-01 10:12 ` George Dunlap
  3 siblings, 0 replies; 5+ messages in thread
From: George Dunlap @ 2010-07-01 10:12 UTC (permalink / raw)
  To: xen-devel; +Cc: george.dunlap

After getting a report of 3.2.3's xenmon crashing Xen (as it turned out
this was because c/s 17000 was backported to that tree without also
applying c/s 17515), I figured that the hypervisor shouldn't rely on any
specific state of the actual trace buffer (as it is shared writable with
Dom0)

[GWD: Volatile quantifiers have been taken out and moved to another patch]

To make clear what purpose specific variables have and/or where they
got loaded from, the patch also changes the type of some of them to
be explicitly u32/s32, and removes pointless assertions (like checking
an unsigned variable to be >= 0).

I also took the prototype adjustment of __trace_var() as an opportunity
to simplify the TRACE_xD() macros. Similar simplification could be done
on the (quite numerous) direct callers of the function.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>

diff -r 16ccc9071662 -r ad8596271373 xen/common/trace.c
--- a/xen/common/trace.c	Thu Jul 01 11:09:49 2010 +0100
+++ b/xen/common/trace.c	Thu Jul 01 11:09:49 2010 +0100
@@ -53,12 +53,12 @@
 static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
 static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
 static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
-static int data_size;
+static u32 data_size;
 static u32 t_info_first_offset __read_mostly;
 
 /* High water mark for trace buffers; */
 /* Send virtual interrupt when buffer level reaches this point */
-static int t_buf_highwater;
+static u32 t_buf_highwater;
 
 /* Number of records lost due to per-CPU trace buffer being full. */
 static DEFINE_PER_CPU(unsigned long, lost_records);
@@ -163,7 +163,7 @@
 
         spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
 
-        buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf;
+        per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf;
         buf->cons = buf->prod = 0;
         per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
 
@@ -214,6 +214,7 @@
         spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
         if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
         {
+            per_cpu(t_bufs, cpu) = NULL;
             ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
             free_xenheap_pages(rawbuf, order);
         }
@@ -432,19 +433,37 @@
     return rc;
 }
 
-static inline int calc_rec_size(int cycles, int extra) 
+static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra)
 {
-    int rec_size;
-    rec_size = 4;
+    unsigned int rec_size = 4;
+
     if ( cycles )
         rec_size += 8;
     rec_size += extra;
     return rec_size;
 }
 
-static inline int calc_unconsumed_bytes(struct t_buf *buf)
+static inline bool_t bogus(u32 prod, u32 cons)
 {
-    int x = buf->prod - buf->cons;
+    if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) ||
+         unlikely(cons & 3) || unlikely(cons >= 2 * data_size) )
+    {
+        tb_init_done = 0;
+        printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons (%08x)\n",
+               smp_processor_id(), prod, cons);
+        return 1;
+    }
+    return 0;
+}
+
+static inline u32 calc_unconsumed_bytes(const struct t_buf *buf)
+{
+    u32 prod = buf->prod, cons = buf->cons;
+    s32 x = prod - cons;
+
+    if ( bogus(prod, cons) )
+        return data_size;
+
     if ( x < 0 )
         x += 2*data_size;
 
@@ -454,9 +473,14 @@
     return x;
 }
 
-static inline int calc_bytes_to_wrap(struct t_buf *buf)
+static inline u32 calc_bytes_to_wrap(const struct t_buf *buf)
 {
-    int x = data_size - buf->prod;
+    u32 prod = buf->prod;
+    s32 x = data_size - prod;
+
+    if ( bogus(prod, buf->cons) )
+        return 0;
+
     if ( x <= 0 )
         x += data_size;
 
@@ -466,35 +490,37 @@
     return x;
 }
 
-static inline int calc_bytes_avail(struct t_buf *buf)
+static inline u32 calc_bytes_avail(const struct t_buf *buf)
 {
     return data_size - calc_unconsumed_bytes(buf);
 }
 
-static inline struct t_rec *
-next_record(struct t_buf *buf)
+static inline struct t_rec *next_record(const struct t_buf *buf)
 {
-    int x = buf->prod;
+    u32 x = buf->prod;
+
+    if ( !tb_init_done || bogus(x, buf->cons) )
+        return NULL;
+
     if ( x >= data_size )
         x -= data_size;
 
-    ASSERT(x >= 0);
     ASSERT(x < data_size);
 
     return (struct t_rec *)&this_cpu(t_data)[x];
 }
 
-static inline int __insert_record(struct t_buf *buf,
-                                  unsigned long event,
-                                  int extra,
-                                  int cycles,
-                                  int rec_size,
-                                  unsigned char *extra_data)
+static inline void __insert_record(struct t_buf *buf,
+                                   unsigned long event,
+                                   unsigned int extra,
+                                   bool_t cycles,
+                                   unsigned int rec_size,
+                                   const void *extra_data)
 {
     struct t_rec *rec;
     unsigned char *dst;
-    unsigned long extra_word = extra/sizeof(u32);
-    int local_rec_size = calc_rec_size(cycles, extra);
+    unsigned int extra_word = extra / sizeof(u32);
+    unsigned int local_rec_size = calc_rec_size(cycles, extra);
     uint32_t next;
 
     BUG_ON(local_rec_size != rec_size);
@@ -510,11 +536,13 @@
             printk(XENLOG_WARNING
                    "%s: avail=%u (size=%08x prod=%08x cons=%08x) rec=%u\n",
                    __func__, next, data_size, buf->prod, buf->cons, rec_size);
-        return 0;
+        return;
     }
     rmb();
 
     rec = next_record(buf);
+    if ( !rec )
+        return;
     rec->event = event;
     rec->extra_u32 = extra_word;
     dst = (unsigned char *)rec->u.nocycles.extra_u32;
@@ -531,21 +559,22 @@
 
     wmb();
 
-    next = buf->prod + rec_size;
+    next = buf->prod;
+    if ( bogus(next, buf->cons) )
+        return;
+    next += rec_size;
     if ( next >= 2*data_size )
         next -= 2*data_size;
-    ASSERT(next >= 0);
     ASSERT(next < 2*data_size);
     buf->prod = next;
-
-    return rec_size;
 }
 
-static inline int insert_wrap_record(struct t_buf *buf, int size)
+static inline void insert_wrap_record(struct t_buf *buf,
+                                      unsigned int size)
 {
-    int space_left = calc_bytes_to_wrap(buf);
-    unsigned long extra_space = space_left - sizeof(u32);
-    int cycles = 0;
+    u32 space_left = calc_bytes_to_wrap(buf);
+    unsigned int extra_space = space_left - sizeof(u32);
+    bool_t cycles = 0;
 
     BUG_ON(space_left > size);
 
@@ -557,17 +586,13 @@
         ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
     }
 
-    return __insert_record(buf,
-                    TRC_TRACE_WRAP_BUFFER,
-                    extra_space,
-                    cycles,
-                    space_left,
-                    NULL);
+    __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles,
+                    space_left, NULL);
 }
 
 #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
 
-static inline int insert_lost_records(struct t_buf *buf)
+static inline void insert_lost_records(struct t_buf *buf)
 {
     struct {
         u32 lost_records;
@@ -582,12 +607,8 @@
 
     this_cpu(lost_records) = 0;
 
-    return __insert_record(buf,
-                           TRC_LOST_RECORDS,
-                           sizeof(ed),
-                           1 /* cycles */,
-                           LOST_REC_SIZE,
-                           (unsigned char *)&ed);
+    __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */,
+                    LOST_REC_SIZE, &ed);
 }
 
 /*
@@ -609,13 +630,15 @@
  * failure, otherwise 0.  Failure occurs only if the trace buffers are not yet
  * initialised.
  */
-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
+void __trace_var(u32 event, bool_t cycles, unsigned int extra,
+                 const void *extra_data)
 {
     struct t_buf *buf;
-    unsigned long flags, bytes_to_tail, bytes_to_wrap;
-    int rec_size, total_size;
-    int extra_word;
-    int started_below_highwater = 0;
+    unsigned long flags;
+    u32 bytes_to_tail, bytes_to_wrap;
+    unsigned int rec_size, total_size;
+    unsigned int extra_word;
+    bool_t started_below_highwater;
 
     if( !tb_init_done )
         return;
@@ -654,7 +677,11 @@
     buf = this_cpu(t_bufs);
 
     if ( unlikely(!buf) )
+    {
+        /* Make gcc happy */
+        started_below_highwater = 0;
         goto unlock;
+    }
 
     started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
 
@@ -735,8 +762,9 @@
     spin_unlock_irqrestore(&this_cpu(t_lock), flags);
 
     /* Notify trace buffer consumer that we've crossed the high water mark. */
-    if ( started_below_highwater &&
-         (calc_unconsumed_bytes(buf) >= t_buf_highwater) )
+    if ( likely(buf!=NULL)
+         && started_below_highwater
+         && (calc_unconsumed_bytes(buf) >= t_buf_highwater) )
         tasklet_schedule(&trace_notify_dom0_tasklet);
 }
 
diff -r 16ccc9071662 -r ad8596271373 xen/include/xen/trace.h
--- a/xen/include/xen/trace.h	Thu Jul 01 11:09:49 2010 +0100
+++ b/xen/include/xen/trace.h	Thu Jul 01 11:09:49 2010 +0100
@@ -36,7 +36,7 @@
 
 int trace_will_trace_event(u32 event);
 
-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data);
+void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *);
 
 static inline void trace_var(u32 event, int cycles, int extra,
                                unsigned char *extra_data)
@@ -57,7 +57,7 @@
         {                                                       \
             u32 _d[1];                                          \
             _d[0] = d1;                                         \
-            __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
  
@@ -68,7 +68,7 @@
             u32 _d[2];                                          \
             _d[0] = d1;                                         \
             _d[1] = d2;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
  
@@ -80,7 +80,7 @@
             _d[0] = d1;                                         \
             _d[1] = d2;                                         \
             _d[2] = d3;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
  
@@ -93,7 +93,7 @@
             _d[1] = d2;                                         \
             _d[2] = d3;                                         \
             _d[3] = d4;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
  
@@ -107,7 +107,7 @@
             _d[2] = d3;                                         \
             _d[3] = d4;                                         \
             _d[4] = d5;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )
 
@@ -122,7 +122,7 @@
             _d[3] = d4;                                         \
             _d[4] = d5;                                         \
             _d[5] = d6;                                         \
-            __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \
+            __trace_var(_e, 1, sizeof(_d), _d);                 \
         }                                                       \
     } while ( 0 )

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2010-07-01 10:12 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-07-01 10:12 [PATCH 0 of 4] trace: Miscellaneous fixes George Dunlap
2010-07-01 10:12 ` [PATCH 1 of 4] trace: Fix T_INFO_FIRST_OFFSET calculation George Dunlap
2010-07-01 10:12 ` [PATCH 2 of 4] trace: improve check_tbuf_size() George Dunlap
2010-07-01 10:12 ` [PATCH 3 of 4] trace: adjust printk()s George Dunlap
2010-07-01 10:12 ` [PATCH 4 of 4] trace: fix security issues George Dunlap

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.