* [PATCH] trace security fixes
@ 2010-06-28 10:39 Jan Beulich
2010-06-28 11:12 ` Keir Fraser
2010-06-29 14:21 ` George Dunlap
0 siblings, 2 replies; 6+ messages in thread
From: Jan Beulich @ 2010-06-28 10:39 UTC (permalink / raw)
To: xen-devel
[-- Attachment #1: Type: text/plain, Size: 19234 bytes --]
After getting a report of 3.2.3's xenmon crashing Xen (as it turned out
this was because c/s 17000 was backported to that tree without also
applying c/s 17515), this is the result of reviewing the code for
security issues:
- the hypervisor should not rely on any specific state of the actual
trace buffer (as it is shared writable with Dom0)
- there's no need to share writably the t_info pages (Dom0 only wants
[and needs] to read it)
- T_INFO_FIRST_OFFSET wasn't defined correctly, thus allowing in the
num_online_cpus() == NR_CPUS case to pass a corrupted MFN to Dom0
- check_tbuf_size() didn't consider the case of the incoming size not
allowing for the 2*data_size range for t_buf->{prod,cons}
- printk()-s should be lower level or rate limited
For the first of those items, I chose to use 'volatile' qualifiers
rather than sprinkling around barriers - this can certainly be changed
if considered unacceptable.
To make clear what purpose specific variables have and/or where they
got loaded from, the patch also changes the type of some of them to
be explicitly u32/s32, and removes pointless assertions (like checking
an unsigned variable to be >= 0).
I also took the prototype adjustment of __trace_var() as an opportunity
to simplify the TRACE_xD() macros. Similar simplification could be done
on the (quite numerous) direct callers of the function.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- 2010-06-15.orig/tools/xenmon/xenbaked.c 2010-06-01 13:39:57.000000000 +0200
+++ 2010-06-15/tools/xenmon/xenbaked.c 2010-06-28 10:23:01.000000000 +0200
@@ -84,7 +84,7 @@ typedef struct settings_st {
} settings_t;
struct t_struct {
- struct t_info *t_info; /* Structure with information about individual buffers */
+ const struct t_info *t_info; /* Structure with information about individual buffers */
struct t_buf **meta; /* Pointers to trace buffer metadata */
unsigned char **data; /* Pointers to trace buffer data areas */
};
@@ -376,9 +376,8 @@ static struct t_struct *map_tbufs(unsign
}
/* Map t_info metadata structure */
- tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN,
- tinfo_size, PROT_READ | PROT_WRITE,
- tbufs_mfn);
+ tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size,
+ PROT_READ, tbufs_mfn);
if ( tbufs.t_info == 0 )
{
@@ -404,7 +403,8 @@ static struct t_struct *map_tbufs(unsign
for(i=0; i<num; i++)
{
- uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) + tbufs.t_info->mfn_offset[i];
+ const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info
+ + tbufs.t_info->mfn_offset[i];
int j;
xen_pfn_t pfn_list[tbufs.t_info->tbuf_size];
--- 2010-06-15.orig/tools/xentrace/xentrace.c 2010-06-01 13:39:57.000000000 +0200
+++ 2010-06-15/tools/xentrace/xentrace.c 2010-06-28 10:23:26.000000000 +0200
@@ -63,7 +63,7 @@ typedef struct settings_st {
} settings_t;
struct t_struct {
- struct t_info *t_info; /* Structure with information about individual buffers */
+ const struct t_info *t_info; /* Structure with information about individual buffers */
struct t_buf **meta; /* Pointers to trace buffer metadata */
unsigned char **data; /* Pointers to trace buffer data areas */
};
@@ -475,9 +475,8 @@ static struct t_struct *map_tbufs(unsign
int i;
/* Map t_info metadata structure */
- tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN,
- tinfo_size, PROT_READ | PROT_WRITE,
- tbufs_mfn);
+ tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size,
+ PROT_READ, tbufs_mfn);
if ( tbufs.t_info == 0 )
{
@@ -503,7 +502,8 @@ static struct t_struct *map_tbufs(unsign
for(i=0; i<num; i++)
{
- uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) + tbufs.t_info->mfn_offset[i];
+ const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info
+ + tbufs.t_info->mfn_offset[i];
int j;
xen_pfn_t pfn_list[tbufs.t_info->tbuf_size];
--- 2010-06-15.orig/xen/common/trace.c 2010-06-28 09:43:58.000000000 +0200
+++ 2010-06-15/xen/common/trace.c 2010-06-28 11:58:37.000000000 +0200
@@ -51,15 +51,15 @@ static struct t_info *t_info;
#define T_INFO_PAGES 2 /* Size fixed at 2 pages for now. */
#define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE))
/* t_info.tbuf_size + list of mfn offsets + 1 to round up / sizeof uint32_t */
-#define T_INFO_FIRST_OFFSET ((sizeof(int16_t) + NR_CPUS * sizeof(int16_t) + 1) / sizeof(uint32_t))
-static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
+#define T_INFO_FIRST_OFFSET (((2 + NR_CPUS) * sizeof(uint16_t)) / sizeof(uint32_t))
+static DEFINE_PER_CPU_READ_MOSTLY(volatile struct t_buf *, t_bufs);
static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
-static int data_size;
+static u32 data_size;
/* High water mark for trace buffers; */
/* Send virtual interrupt when buffer level reaches this point */
-static int t_buf_highwater;
+static u32 t_buf_highwater;
/* Number of records lost due to per-CPU trace buffer being full. */
static DEFINE_PER_CPU(unsigned long, lost_records);
@@ -77,11 +77,16 @@ static u32 tb_event_mask = TRC_ALL;
/**
* check_tbuf_size - check to make sure that the proposed size will fit
- * in the currently sized struct t_info.
+ * in the currently sized struct t_info and allows prod and cons to
+ * reach double the value without overflow.
*/
-static inline int check_tbuf_size(int size)
+static int check_tbuf_size(u32 pages)
{
- return (num_online_cpus() * size + T_INFO_FIRST_OFFSET) > (T_INFO_SIZE / sizeof(uint32_t));
+ u32 size = pages * PAGE_SIZE;
+
+ return (size / PAGE_SIZE != pages) || (size + size < size) ||
+ (num_online_cpus() * pages + T_INFO_FIRST_OFFSET >
+ T_INFO_SIZE / sizeof(uint32_t));
}
/**
@@ -115,7 +120,7 @@ static int alloc_trace_bufs(void)
}
t_info->tbuf_size = opt_tbuf_size;
- printk("tbuf_size %d\n", t_info->tbuf_size);
+ printk(XENLOG_INFO "tbuf_size %d\n", t_info->tbuf_size);
nr_pages = opt_tbuf_size;
order = get_order_from_pages(nr_pages);
@@ -139,7 +144,7 @@ static int alloc_trace_bufs(void)
spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
- buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf;
+ per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf;
buf->cons = buf->prod = 0;
per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
@@ -171,7 +176,7 @@ static int alloc_trace_bufs(void)
/* Write list first, then write per-cpu offset. */
wmb();
t_info->mfn_offset[cpu]=offset;
- printk("p%d mfn %"PRIx32" offset %d\n",
+ printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n",
cpu, mfn, offset);
offset+=i;
}
@@ -190,6 +195,7 @@ out_dealloc:
spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
{
+ per_cpu(t_bufs, cpu) = NULL;
ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
free_xenheap_pages(rawbuf, order);
}
@@ -308,7 +314,7 @@ void __init init_trace_bufs(void)
for(i=0; i<T_INFO_PAGES; i++)
share_xen_page_with_privileged_guests(
- virt_to_page(t_info) + i, XENSHARE_writable);
+ virt_to_page(t_info) + i, XENSHARE_readonly);
if ( opt_tbuf_size == 0 )
{
@@ -404,19 +410,37 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc
return rc;
}
-static inline int calc_rec_size(int cycles, int extra)
+static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra)
{
- int rec_size;
- rec_size = 4;
+ unsigned int rec_size = 4;
+
if ( cycles )
rec_size += 8;
rec_size += extra;
return rec_size;
}
-static inline int calc_unconsumed_bytes(struct t_buf *buf)
+static inline bool_t bogus(u32 prod, u32 cons)
+{
+ if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) ||
+ unlikely(cons & 3) || unlikely(cons >= 2 * data_size) )
+ {
+ tb_init_done = 0;
+ printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons (%08x)\n",
+ smp_processor_id(), prod, cons);
+ return 1;
+ }
+ return 0;
+}
+
+static inline u32 calc_unconsumed_bytes(const volatile struct t_buf *buf)
{
- int x = buf->prod - buf->cons;
+ u32 prod = buf->prod, cons = buf->cons;
+ s32 x = prod - cons;
+
+ if ( bogus(prod, cons) )
+ return data_size;
+
if ( x < 0 )
x += 2*data_size;
@@ -426,9 +450,14 @@ static inline int calc_unconsumed_bytes(
return x;
}
-static inline int calc_bytes_to_wrap(struct t_buf *buf)
+static inline u32 calc_bytes_to_wrap(const volatile struct t_buf *buf)
{
- int x = data_size - buf->prod;
+ u32 prod = buf->prod;
+ s32 x = data_size - prod;
+
+ if ( bogus(prod, buf->cons) )
+ return 0;
+
if ( x <= 0 )
x += data_size;
@@ -438,35 +467,37 @@ static inline int calc_bytes_to_wrap(str
return x;
}
-static inline int calc_bytes_avail(struct t_buf *buf)
+static inline u32 calc_bytes_avail(const volatile struct t_buf *buf)
{
return data_size - calc_unconsumed_bytes(buf);
}
-static inline struct t_rec *
-next_record(struct t_buf *buf)
+static inline struct t_rec *next_record(const volatile struct t_buf *buf)
{
- int x = buf->prod;
+ u32 x = buf->prod;
+
+ if ( !tb_init_done || bogus(x, buf->cons) )
+ return NULL;
+
if ( x >= data_size )
x -= data_size;
- ASSERT(x >= 0);
ASSERT(x < data_size);
return (struct t_rec *)&this_cpu(t_data)[x];
}
-static inline int __insert_record(struct t_buf *buf,
- unsigned long event,
- int extra,
- int cycles,
- int rec_size,
- unsigned char *extra_data)
+static inline void __insert_record(volatile struct t_buf *buf,
+ unsigned long event,
+ unsigned int extra,
+ bool_t cycles,
+ unsigned int rec_size,
+ const void *extra_data)
{
struct t_rec *rec;
unsigned char *dst;
- unsigned long extra_word = extra/sizeof(u32);
- int local_rec_size = calc_rec_size(cycles, extra);
+ unsigned int extra_word = extra / sizeof(u32);
+ unsigned int local_rec_size = calc_rec_size(cycles, extra);
uint32_t next;
BUG_ON(local_rec_size != rec_size);
@@ -475,17 +506,20 @@ static inline int __insert_record(struct
/* Double-check once more that we have enough space.
* Don't bugcheck here, in case the userland tool is doing
* something stupid. */
- if ( calc_bytes_avail(buf) < rec_size )
+ next = calc_bytes_avail(buf);
+ if ( next < rec_size )
{
- printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n",
- __func__,
- calc_bytes_avail(buf),
- data_size, buf->prod, buf->cons, data_size, rec_size);
- return 0;
+ if ( printk_ratelimit() )
+ printk(XENLOG_WARNING
+ "%s: avail=%u (size=%08x prod=%08x cons=%08x) rec=%u\n",
+ __func__, next, data_size, buf->prod, buf->cons, rec_size);
+ return;
}
rmb();
rec = next_record(buf);
+ if ( !rec )
+ return;
rec->event = event;
rec->extra_u32 = extra_word;
dst = (unsigned char *)rec->u.nocycles.extra_u32;
@@ -502,21 +536,22 @@ static inline int __insert_record(struct
wmb();
- next = buf->prod + rec_size;
+ next = buf->prod;
+ if ( bogus(next, buf->cons) )
+ return;
+ next += rec_size;
if ( next >= 2*data_size )
next -= 2*data_size;
- ASSERT(next >= 0);
ASSERT(next < 2*data_size);
buf->prod = next;
-
- return rec_size;
}
-static inline int insert_wrap_record(struct t_buf *buf, int size)
+static inline void insert_wrap_record(volatile struct t_buf *buf,
+ unsigned int size)
{
- int space_left = calc_bytes_to_wrap(buf);
- unsigned long extra_space = space_left - sizeof(u32);
- int cycles = 0;
+ u32 space_left = calc_bytes_to_wrap(buf);
+ unsigned int extra_space = space_left - sizeof(u32);
+ bool_t cycles = 0;
BUG_ON(space_left > size);
@@ -528,17 +563,13 @@ static inline int insert_wrap_record(str
ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
}
- return __insert_record(buf,
- TRC_TRACE_WRAP_BUFFER,
- extra_space,
- cycles,
- space_left,
- NULL);
+ __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles,
+ space_left, NULL);
}
#define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
-static inline int insert_lost_records(struct t_buf *buf)
+static inline void insert_lost_records(volatile struct t_buf *buf)
{
struct {
u32 lost_records;
@@ -553,12 +584,8 @@ static inline int insert_lost_records(st
this_cpu(lost_records) = 0;
- return __insert_record(buf,
- TRC_LOST_RECORDS,
- sizeof(ed),
- 1 /* cycles */,
- LOST_REC_SIZE,
- (unsigned char *)&ed);
+ __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */,
+ LOST_REC_SIZE, &ed);
}
/*
@@ -580,13 +607,15 @@ static DECLARE_TASKLET(trace_notify_dom0
* failure, otherwise 0. Failure occurs only if the trace buffers are not yet
* initialised.
*/
-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
+void __trace_var(u32 event, bool_t cycles, unsigned int extra,
+ const void *extra_data)
{
- struct t_buf *buf;
- unsigned long flags, bytes_to_tail, bytes_to_wrap;
- int rec_size, total_size;
- int extra_word;
- int started_below_highwater = 0;
+ volatile struct t_buf *buf;
+ unsigned long flags;
+ u32 bytes_to_tail, bytes_to_wrap;
+ unsigned int rec_size, total_size;
+ unsigned int extra_word;
+ bool_t started_below_highwater;
if( !tb_init_done )
return;
@@ -619,13 +648,11 @@ void __trace_var(u32 event, int cycles,
/* Read tb_init_done /before/ t_bufs. */
rmb();
-
- spin_lock_irqsave(&this_cpu(t_lock), flags);
-
buf = this_cpu(t_bufs);
-
if ( unlikely(!buf) )
- goto unlock;
+ return;
+
+ spin_lock_irqsave(&this_cpu(t_lock), flags);
started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
--- 2010-06-15.orig/xen/include/xen/trace.h 2010-06-28 09:43:58.000000000 +0200
+++ 2010-06-15/xen/include/xen/trace.h 2010-06-25 12:08:36.000000000 +0200
@@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op
int trace_will_trace_event(u32 event);
-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data);
+void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *);
static inline void trace_var(u32 event, int cycles, int extra,
unsigned char *extra_data)
@@ -57,7 +57,7 @@ static inline void trace_var(u32 event,
{ \
u32 _d[1]; \
_d[0] = d1; \
- __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -68,7 +68,7 @@ static inline void trace_var(u32 event,
u32 _d[2]; \
_d[0] = d1; \
_d[1] = d2; \
- __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -80,7 +80,7 @@ static inline void trace_var(u32 event,
_d[0] = d1; \
_d[1] = d2; \
_d[2] = d3; \
- __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -93,7 +93,7 @@ static inline void trace_var(u32 event,
_d[1] = d2; \
_d[2] = d3; \
_d[3] = d4; \
- __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -107,7 +107,7 @@ static inline void trace_var(u32 event,
_d[2] = d3; \
_d[3] = d4; \
_d[4] = d5; \
- __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -122,7 +122,7 @@ static inline void trace_var(u32 event,
_d[3] = d4; \
_d[4] = d5; \
_d[5] = d6; \
- __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
[-- Attachment #2: trace-security.patch --]
[-- Type: text/plain, Size: 19230 bytes --]
After getting a report of 3.2.3's xenmon crashing Xen (as it turned out
this was because c/s 17000 was backported to that tree without also
applying c/s 17515), this is the result of reviewing the code for
security issues:
- the hypervisor should not rely on any specific state of the actual
trace buffer (as it is shared writable with Dom0)
- there's no need to share writably the t_info pages (Dom0 only wants
[and needs] to read it)
- T_INFO_FIRST_OFFSET wasn't defined correctly, thus allowing in the
num_online_cpus() == NR_CPUS case to pass a corrupted MFN to Dom0
- check_tbuf_size() didn't consider the case of the incoming size not
allowing for the 2*data_size range for t_buf->{prod,cons}
- printk()-s should be lower level or rate limited
For the first of those items, I chose to use 'volatile' qualifiers
rather than sprinkling around barriers - this can certainly be changed
if considered unacceptable.
To make clear what purpose specific variables have and/or where they
got loaded from, the patch also changes the type of some of them to
be explicitly u32/s32, and removes pointless assertions (like checking
an unsigned variable to be >= 0).
I also took the prototype adjustment of __trace_var() as an opportunity
to simplify the TRACE_xD() macros. Similar simplification could be done
on the (quite numerous) direct callers of the function.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- 2010-06-15.orig/tools/xenmon/xenbaked.c 2010-06-01 13:39:57.000000000 +0200
+++ 2010-06-15/tools/xenmon/xenbaked.c 2010-06-28 10:23:01.000000000 +0200
@@ -84,7 +84,7 @@ typedef struct settings_st {
} settings_t;
struct t_struct {
- struct t_info *t_info; /* Structure with information about individual buffers */
+ const struct t_info *t_info; /* Structure with information about individual buffers */
struct t_buf **meta; /* Pointers to trace buffer metadata */
unsigned char **data; /* Pointers to trace buffer data areas */
};
@@ -376,9 +376,8 @@ static struct t_struct *map_tbufs(unsign
}
/* Map t_info metadata structure */
- tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN,
- tinfo_size, PROT_READ | PROT_WRITE,
- tbufs_mfn);
+ tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size,
+ PROT_READ, tbufs_mfn);
if ( tbufs.t_info == 0 )
{
@@ -404,7 +403,8 @@ static struct t_struct *map_tbufs(unsign
for(i=0; i<num; i++)
{
- uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) + tbufs.t_info->mfn_offset[i];
+ const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info
+ + tbufs.t_info->mfn_offset[i];
int j;
xen_pfn_t pfn_list[tbufs.t_info->tbuf_size];
--- 2010-06-15.orig/tools/xentrace/xentrace.c 2010-06-01 13:39:57.000000000 +0200
+++ 2010-06-15/tools/xentrace/xentrace.c 2010-06-28 10:23:26.000000000 +0200
@@ -63,7 +63,7 @@ typedef struct settings_st {
} settings_t;
struct t_struct {
- struct t_info *t_info; /* Structure with information about individual buffers */
+ const struct t_info *t_info; /* Structure with information about individual buffers */
struct t_buf **meta; /* Pointers to trace buffer metadata */
unsigned char **data; /* Pointers to trace buffer data areas */
};
@@ -475,9 +475,8 @@ static struct t_struct *map_tbufs(unsign
int i;
/* Map t_info metadata structure */
- tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN,
- tinfo_size, PROT_READ | PROT_WRITE,
- tbufs_mfn);
+ tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size,
+ PROT_READ, tbufs_mfn);
if ( tbufs.t_info == 0 )
{
@@ -503,7 +502,8 @@ static struct t_struct *map_tbufs(unsign
for(i=0; i<num; i++)
{
- uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) + tbufs.t_info->mfn_offset[i];
+ const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info
+ + tbufs.t_info->mfn_offset[i];
int j;
xen_pfn_t pfn_list[tbufs.t_info->tbuf_size];
--- 2010-06-15.orig/xen/common/trace.c 2010-06-28 09:43:58.000000000 +0200
+++ 2010-06-15/xen/common/trace.c 2010-06-28 11:58:37.000000000 +0200
@@ -51,15 +51,15 @@ static struct t_info *t_info;
#define T_INFO_PAGES 2 /* Size fixed at 2 pages for now. */
#define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE))
/* t_info.tbuf_size + list of mfn offsets + 1 to round up / sizeof uint32_t */
-#define T_INFO_FIRST_OFFSET ((sizeof(int16_t) + NR_CPUS * sizeof(int16_t) + 1) / sizeof(uint32_t))
-static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
+#define T_INFO_FIRST_OFFSET (((2 + NR_CPUS) * sizeof(uint16_t)) / sizeof(uint32_t))
+static DEFINE_PER_CPU_READ_MOSTLY(volatile struct t_buf *, t_bufs);
static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
-static int data_size;
+static u32 data_size;
/* High water mark for trace buffers; */
/* Send virtual interrupt when buffer level reaches this point */
-static int t_buf_highwater;
+static u32 t_buf_highwater;
/* Number of records lost due to per-CPU trace buffer being full. */
static DEFINE_PER_CPU(unsigned long, lost_records);
@@ -77,11 +77,16 @@ static u32 tb_event_mask = TRC_ALL;
/**
* check_tbuf_size - check to make sure that the proposed size will fit
- * in the currently sized struct t_info.
+ * in the currently sized struct t_info and allows prod and cons to
+ * reach double the value without overflow.
*/
-static inline int check_tbuf_size(int size)
+static int check_tbuf_size(u32 pages)
{
- return (num_online_cpus() * size + T_INFO_FIRST_OFFSET) > (T_INFO_SIZE / sizeof(uint32_t));
+ u32 size = pages * PAGE_SIZE;
+
+ return (size / PAGE_SIZE != pages) || (size + size < size) ||
+ (num_online_cpus() * pages + T_INFO_FIRST_OFFSET >
+ T_INFO_SIZE / sizeof(uint32_t));
}
/**
@@ -115,7 +120,7 @@ static int alloc_trace_bufs(void)
}
t_info->tbuf_size = opt_tbuf_size;
- printk("tbuf_size %d\n", t_info->tbuf_size);
+ printk(XENLOG_INFO "tbuf_size %d\n", t_info->tbuf_size);
nr_pages = opt_tbuf_size;
order = get_order_from_pages(nr_pages);
@@ -139,7 +144,7 @@ static int alloc_trace_bufs(void)
spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
- buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf;
+ per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf;
buf->cons = buf->prod = 0;
per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
@@ -171,7 +176,7 @@ static int alloc_trace_bufs(void)
/* Write list first, then write per-cpu offset. */
wmb();
t_info->mfn_offset[cpu]=offset;
- printk("p%d mfn %"PRIx32" offset %d\n",
+ printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n",
cpu, mfn, offset);
offset+=i;
}
@@ -190,6 +195,7 @@ out_dealloc:
spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
{
+ per_cpu(t_bufs, cpu) = NULL;
ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
free_xenheap_pages(rawbuf, order);
}
@@ -308,7 +314,7 @@ void __init init_trace_bufs(void)
for(i=0; i<T_INFO_PAGES; i++)
share_xen_page_with_privileged_guests(
- virt_to_page(t_info) + i, XENSHARE_writable);
+ virt_to_page(t_info) + i, XENSHARE_readonly);
if ( opt_tbuf_size == 0 )
{
@@ -404,19 +410,37 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc
return rc;
}
-static inline int calc_rec_size(int cycles, int extra)
+static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra)
{
- int rec_size;
- rec_size = 4;
+ unsigned int rec_size = 4;
+
if ( cycles )
rec_size += 8;
rec_size += extra;
return rec_size;
}
-static inline int calc_unconsumed_bytes(struct t_buf *buf)
+static inline bool_t bogus(u32 prod, u32 cons)
+{
+ if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) ||
+ unlikely(cons & 3) || unlikely(cons >= 2 * data_size) )
+ {
+ tb_init_done = 0;
+ printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons (%08x)\n",
+ smp_processor_id(), prod, cons);
+ return 1;
+ }
+ return 0;
+}
+
+static inline u32 calc_unconsumed_bytes(const volatile struct t_buf *buf)
{
- int x = buf->prod - buf->cons;
+ u32 prod = buf->prod, cons = buf->cons;
+ s32 x = prod - cons;
+
+ if ( bogus(prod, cons) )
+ return data_size;
+
if ( x < 0 )
x += 2*data_size;
@@ -426,9 +450,14 @@ static inline int calc_unconsumed_bytes(
return x;
}
-static inline int calc_bytes_to_wrap(struct t_buf *buf)
+static inline u32 calc_bytes_to_wrap(const volatile struct t_buf *buf)
{
- int x = data_size - buf->prod;
+ u32 prod = buf->prod;
+ s32 x = data_size - prod;
+
+ if ( bogus(prod, buf->cons) )
+ return 0;
+
if ( x <= 0 )
x += data_size;
@@ -438,35 +467,37 @@ static inline int calc_bytes_to_wrap(str
return x;
}
-static inline int calc_bytes_avail(struct t_buf *buf)
+static inline u32 calc_bytes_avail(const volatile struct t_buf *buf)
{
return data_size - calc_unconsumed_bytes(buf);
}
-static inline struct t_rec *
-next_record(struct t_buf *buf)
+static inline struct t_rec *next_record(const volatile struct t_buf *buf)
{
- int x = buf->prod;
+ u32 x = buf->prod;
+
+ if ( !tb_init_done || bogus(x, buf->cons) )
+ return NULL;
+
if ( x >= data_size )
x -= data_size;
- ASSERT(x >= 0);
ASSERT(x < data_size);
return (struct t_rec *)&this_cpu(t_data)[x];
}
-static inline int __insert_record(struct t_buf *buf,
- unsigned long event,
- int extra,
- int cycles,
- int rec_size,
- unsigned char *extra_data)
+static inline void __insert_record(volatile struct t_buf *buf,
+ unsigned long event,
+ unsigned int extra,
+ bool_t cycles,
+ unsigned int rec_size,
+ const void *extra_data)
{
struct t_rec *rec;
unsigned char *dst;
- unsigned long extra_word = extra/sizeof(u32);
- int local_rec_size = calc_rec_size(cycles, extra);
+ unsigned int extra_word = extra / sizeof(u32);
+ unsigned int local_rec_size = calc_rec_size(cycles, extra);
uint32_t next;
BUG_ON(local_rec_size != rec_size);
@@ -475,17 +506,20 @@ static inline int __insert_record(struct
/* Double-check once more that we have enough space.
* Don't bugcheck here, in case the userland tool is doing
* something stupid. */
- if ( calc_bytes_avail(buf) < rec_size )
+ next = calc_bytes_avail(buf);
+ if ( next < rec_size )
{
- printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n",
- __func__,
- calc_bytes_avail(buf),
- data_size, buf->prod, buf->cons, data_size, rec_size);
- return 0;
+ if ( printk_ratelimit() )
+ printk(XENLOG_WARNING
+ "%s: avail=%u (size=%08x prod=%08x cons=%08x) rec=%u\n",
+ __func__, next, data_size, buf->prod, buf->cons, rec_size);
+ return;
}
rmb();
rec = next_record(buf);
+ if ( !rec )
+ return;
rec->event = event;
rec->extra_u32 = extra_word;
dst = (unsigned char *)rec->u.nocycles.extra_u32;
@@ -502,21 +536,22 @@ static inline int __insert_record(struct
wmb();
- next = buf->prod + rec_size;
+ next = buf->prod;
+ if ( bogus(next, buf->cons) )
+ return;
+ next += rec_size;
if ( next >= 2*data_size )
next -= 2*data_size;
- ASSERT(next >= 0);
ASSERT(next < 2*data_size);
buf->prod = next;
-
- return rec_size;
}
-static inline int insert_wrap_record(struct t_buf *buf, int size)
+static inline void insert_wrap_record(volatile struct t_buf *buf,
+ unsigned int size)
{
- int space_left = calc_bytes_to_wrap(buf);
- unsigned long extra_space = space_left - sizeof(u32);
- int cycles = 0;
+ u32 space_left = calc_bytes_to_wrap(buf);
+ unsigned int extra_space = space_left - sizeof(u32);
+ bool_t cycles = 0;
BUG_ON(space_left > size);
@@ -528,17 +563,13 @@ static inline int insert_wrap_record(str
ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
}
- return __insert_record(buf,
- TRC_TRACE_WRAP_BUFFER,
- extra_space,
- cycles,
- space_left,
- NULL);
+ __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles,
+ space_left, NULL);
}
#define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
-static inline int insert_lost_records(struct t_buf *buf)
+static inline void insert_lost_records(volatile struct t_buf *buf)
{
struct {
u32 lost_records;
@@ -553,12 +584,8 @@ static inline int insert_lost_records(st
this_cpu(lost_records) = 0;
- return __insert_record(buf,
- TRC_LOST_RECORDS,
- sizeof(ed),
- 1 /* cycles */,
- LOST_REC_SIZE,
- (unsigned char *)&ed);
+ __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */,
+ LOST_REC_SIZE, &ed);
}
/*
@@ -580,13 +607,15 @@ static DECLARE_TASKLET(trace_notify_dom0
* failure, otherwise 0. Failure occurs only if the trace buffers are not yet
* initialised.
*/
-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
+void __trace_var(u32 event, bool_t cycles, unsigned int extra,
+ const void *extra_data)
{
- struct t_buf *buf;
- unsigned long flags, bytes_to_tail, bytes_to_wrap;
- int rec_size, total_size;
- int extra_word;
- int started_below_highwater = 0;
+ volatile struct t_buf *buf;
+ unsigned long flags;
+ u32 bytes_to_tail, bytes_to_wrap;
+ unsigned int rec_size, total_size;
+ unsigned int extra_word;
+ bool_t started_below_highwater;
if( !tb_init_done )
return;
@@ -619,13 +648,11 @@ void __trace_var(u32 event, int cycles,
/* Read tb_init_done /before/ t_bufs. */
rmb();
-
- spin_lock_irqsave(&this_cpu(t_lock), flags);
-
buf = this_cpu(t_bufs);
-
if ( unlikely(!buf) )
- goto unlock;
+ return;
+
+ spin_lock_irqsave(&this_cpu(t_lock), flags);
started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
--- 2010-06-15.orig/xen/include/xen/trace.h 2010-06-28 09:43:58.000000000 +0200
+++ 2010-06-15/xen/include/xen/trace.h 2010-06-25 12:08:36.000000000 +0200
@@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op
int trace_will_trace_event(u32 event);
-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data);
+void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *);
static inline void trace_var(u32 event, int cycles, int extra,
unsigned char *extra_data)
@@ -57,7 +57,7 @@ static inline void trace_var(u32 event,
{ \
u32 _d[1]; \
_d[0] = d1; \
- __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -68,7 +68,7 @@ static inline void trace_var(u32 event,
u32 _d[2]; \
_d[0] = d1; \
_d[1] = d2; \
- __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -80,7 +80,7 @@ static inline void trace_var(u32 event,
_d[0] = d1; \
_d[1] = d2; \
_d[2] = d3; \
- __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -93,7 +93,7 @@ static inline void trace_var(u32 event,
_d[1] = d2; \
_d[2] = d3; \
_d[3] = d4; \
- __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -107,7 +107,7 @@ static inline void trace_var(u32 event,
_d[2] = d3; \
_d[3] = d4; \
_d[4] = d5; \
- __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -122,7 +122,7 @@ static inline void trace_var(u32 event,
_d[3] = d4; \
_d[4] = d5; \
_d[5] = d6; \
- __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH] trace security fixes
2010-06-28 10:39 [PATCH] trace security fixes Jan Beulich
@ 2010-06-28 11:12 ` Keir Fraser
2010-06-28 12:02 ` Jan Beulich
2010-06-29 14:21 ` George Dunlap
1 sibling, 1 reply; 6+ messages in thread
From: Keir Fraser @ 2010-06-28 11:12 UTC (permalink / raw)
To: Jan Beulich, xen-devel@lists.xensource.com; +Cc: George Dunlap
Both of these xentrace patches need an Ack from George Dunlap. Also I will
need to know if they belong in any tree other than xen-unstable.
I would never tolerate use of volatiles rather than proper barriers in
general. But for xentrace I'll leave it up to George.
-- Keir
On 28/06/2010 11:39, "Jan Beulich" <JBeulich@novell.com> wrote:
> After getting a report of 3.2.3's xenmon crashing Xen (as it turned out
> this was because c/s 17000 was backported to that tree without also
> applying c/s 17515), this is the result of reviewing the code for
> security issues:
> - the hypervisor should not rely on any specific state of the actual
> trace buffer (as it is shared writable with Dom0)
> - there's no need to share writably the t_info pages (Dom0 only wants
> [and needs] to read it)
> - T_INFO_FIRST_OFFSET wasn't defined correctly, thus allowing in the
> num_online_cpus() == NR_CPUS case to pass a corrupted MFN to Dom0
> - check_tbuf_size() didn't consider the case of the incoming size not
> allowing for the 2*data_size range for t_buf->{prod,cons}
> - printk()-s should be lower level or rate limited
>
> For the first of those items, I chose to use 'volatile' qualifiers
> rather than sprinkling around barriers - this can certainly be changed
> if considered unacceptable.
>
> To make clear what purpose specific variables have and/or where they
> got loaded from, the patch also changes the type of some of them to
> be explicitly u32/s32, and removes pointless assertions (like checking
> an unsigned variable to be >= 0).
>
> I also took the prototype adjustment of __trace_var() as an opportunity
> to simplify the TRACE_xD() macros. Similar simplification could be done
> on the (quite numerous) direct callers of the function.
>
> Signed-off-by: Jan Beulich <jbeulich@novell.com>
>
> --- 2010-06-15.orig/tools/xenmon/xenbaked.c 2010-06-01 13:39:57.000000000
> +0200
> +++ 2010-06-15/tools/xenmon/xenbaked.c 2010-06-28 10:23:01.000000000 +0200
> @@ -84,7 +84,7 @@ typedef struct settings_st {
> } settings_t;
>
> struct t_struct {
> - struct t_info *t_info; /* Structure with information about individual
> buffers */
> + const struct t_info *t_info; /* Structure with information about
> individual buffers */
> struct t_buf **meta; /* Pointers to trace buffer metadata */
> unsigned char **data; /* Pointers to trace buffer data areas */
> };
> @@ -376,9 +376,8 @@ static struct t_struct *map_tbufs(unsign
> }
>
> /* Map t_info metadata structure */
> - tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN,
> - tinfo_size, PROT_READ | PROT_WRITE,
> - tbufs_mfn);
> + tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size,
> + PROT_READ, tbufs_mfn);
>
> if ( tbufs.t_info == 0 )
> {
> @@ -404,7 +403,8 @@ static struct t_struct *map_tbufs(unsign
> for(i=0; i<num; i++)
> {
>
> - uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) +
> tbufs.t_info->mfn_offset[i];
> + const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info
> + + tbufs.t_info->mfn_offset[i];
> int j;
> xen_pfn_t pfn_list[tbufs.t_info->tbuf_size];
>
> --- 2010-06-15.orig/tools/xentrace/xentrace.c 2010-06-01 13:39:57.000000000
> +0200
> +++ 2010-06-15/tools/xentrace/xentrace.c 2010-06-28 10:23:26.000000000
> +0200
> @@ -63,7 +63,7 @@ typedef struct settings_st {
> } settings_t;
>
> struct t_struct {
> - struct t_info *t_info; /* Structure with information about individual
> buffers */
> + const struct t_info *t_info; /* Structure with information about
> individual buffers */
> struct t_buf **meta; /* Pointers to trace buffer metadata */
> unsigned char **data; /* Pointers to trace buffer data areas */
> };
> @@ -475,9 +475,8 @@ static struct t_struct *map_tbufs(unsign
> int i;
>
> /* Map t_info metadata structure */
> - tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN,
> - tinfo_size, PROT_READ | PROT_WRITE,
> - tbufs_mfn);
> + tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size,
> + PROT_READ, tbufs_mfn);
>
> if ( tbufs.t_info == 0 )
> {
> @@ -503,7 +502,8 @@ static struct t_struct *map_tbufs(unsign
> for(i=0; i<num; i++)
> {
>
> - uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) +
> tbufs.t_info->mfn_offset[i];
> + const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info
> + + tbufs.t_info->mfn_offset[i];
> int j;
> xen_pfn_t pfn_list[tbufs.t_info->tbuf_size];
>
> --- 2010-06-15.orig/xen/common/trace.c 2010-06-28 09:43:58.000000000 +0200
> +++ 2010-06-15/xen/common/trace.c 2010-06-28 11:58:37.000000000 +0200
> @@ -51,15 +51,15 @@ static struct t_info *t_info;
> #define T_INFO_PAGES 2 /* Size fixed at 2 pages for now. */
> #define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE))
> /* t_info.tbuf_size + list of mfn offsets + 1 to round up / sizeof uint32_t
> */
> -#define T_INFO_FIRST_OFFSET ((sizeof(int16_t) + NR_CPUS * sizeof(int16_t) +
> 1) / sizeof(uint32_t))
> -static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
> +#define T_INFO_FIRST_OFFSET (((2 + NR_CPUS) * sizeof(uint16_t)) /
> sizeof(uint32_t))
> +static DEFINE_PER_CPU_READ_MOSTLY(volatile struct t_buf *, t_bufs);
> static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
> static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
> -static int data_size;
> +static u32 data_size;
>
> /* High water mark for trace buffers; */
> /* Send virtual interrupt when buffer level reaches this point */
> -static int t_buf_highwater;
> +static u32 t_buf_highwater;
>
> /* Number of records lost due to per-CPU trace buffer being full. */
> static DEFINE_PER_CPU(unsigned long, lost_records);
> @@ -77,11 +77,16 @@ static u32 tb_event_mask = TRC_ALL;
>
> /**
> * check_tbuf_size - check to make sure that the proposed size will fit
> - * in the currently sized struct t_info.
> + * in the currently sized struct t_info and allows prod and cons to
> + * reach double the value without overflow.
> */
> -static inline int check_tbuf_size(int size)
> +static int check_tbuf_size(u32 pages)
> {
> - return (num_online_cpus() * size + T_INFO_FIRST_OFFSET) > (T_INFO_SIZE /
> sizeof(uint32_t));
> + u32 size = pages * PAGE_SIZE;
> +
> + return (size / PAGE_SIZE != pages) || (size + size < size) ||
> + (num_online_cpus() * pages + T_INFO_FIRST_OFFSET >
> + T_INFO_SIZE / sizeof(uint32_t));
> }
>
> /**
> @@ -115,7 +120,7 @@ static int alloc_trace_bufs(void)
> }
>
> t_info->tbuf_size = opt_tbuf_size;
> - printk("tbuf_size %d\n", t_info->tbuf_size);
> + printk(XENLOG_INFO "tbuf_size %d\n", t_info->tbuf_size);
>
> nr_pages = opt_tbuf_size;
> order = get_order_from_pages(nr_pages);
> @@ -139,7 +144,7 @@ static int alloc_trace_bufs(void)
>
> spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
>
> - buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf;
> + per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf;
> buf->cons = buf->prod = 0;
> per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
>
> @@ -171,7 +176,7 @@ static int alloc_trace_bufs(void)
> /* Write list first, then write per-cpu offset. */
> wmb();
> t_info->mfn_offset[cpu]=offset;
> - printk("p%d mfn %"PRIx32" offset %d\n",
> + printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n",
> cpu, mfn, offset);
> offset+=i;
> }
> @@ -190,6 +195,7 @@ out_dealloc:
> spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
> if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
> {
> + per_cpu(t_bufs, cpu) = NULL;
> ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
> free_xenheap_pages(rawbuf, order);
> }
> @@ -308,7 +314,7 @@ void __init init_trace_bufs(void)
>
> for(i=0; i<T_INFO_PAGES; i++)
> share_xen_page_with_privileged_guests(
> - virt_to_page(t_info) + i, XENSHARE_writable);
> + virt_to_page(t_info) + i, XENSHARE_readonly);
>
> if ( opt_tbuf_size == 0 )
> {
> @@ -404,19 +410,37 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc
> return rc;
> }
>
> -static inline int calc_rec_size(int cycles, int extra)
> +static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra)
> {
> - int rec_size;
> - rec_size = 4;
> + unsigned int rec_size = 4;
> +
> if ( cycles )
> rec_size += 8;
> rec_size += extra;
> return rec_size;
> }
>
> -static inline int calc_unconsumed_bytes(struct t_buf *buf)
> +static inline bool_t bogus(u32 prod, u32 cons)
> +{
> + if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) ||
> + unlikely(cons & 3) || unlikely(cons >= 2 * data_size) )
> + {
> + tb_init_done = 0;
> + printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons
> (%08x)\n",
> + smp_processor_id(), prod, cons);
> + return 1;
> + }
> + return 0;
> +}
> +
> +static inline u32 calc_unconsumed_bytes(const volatile struct t_buf *buf)
> {
> - int x = buf->prod - buf->cons;
> + u32 prod = buf->prod, cons = buf->cons;
> + s32 x = prod - cons;
> +
> + if ( bogus(prod, cons) )
> + return data_size;
> +
> if ( x < 0 )
> x += 2*data_size;
>
> @@ -426,9 +450,14 @@ static inline int calc_unconsumed_bytes(
> return x;
> }
>
> -static inline int calc_bytes_to_wrap(struct t_buf *buf)
> +static inline u32 calc_bytes_to_wrap(const volatile struct t_buf *buf)
> {
> - int x = data_size - buf->prod;
> + u32 prod = buf->prod;
> + s32 x = data_size - prod;
> +
> + if ( bogus(prod, buf->cons) )
> + return 0;
> +
> if ( x <= 0 )
> x += data_size;
>
> @@ -438,35 +467,37 @@ static inline int calc_bytes_to_wrap(str
> return x;
> }
>
> -static inline int calc_bytes_avail(struct t_buf *buf)
> +static inline u32 calc_bytes_avail(const volatile struct t_buf *buf)
> {
> return data_size - calc_unconsumed_bytes(buf);
> }
>
> -static inline struct t_rec *
> -next_record(struct t_buf *buf)
> +static inline struct t_rec *next_record(const volatile struct t_buf *buf)
> {
> - int x = buf->prod;
> + u32 x = buf->prod;
> +
> + if ( !tb_init_done || bogus(x, buf->cons) )
> + return NULL;
> +
> if ( x >= data_size )
> x -= data_size;
>
> - ASSERT(x >= 0);
> ASSERT(x < data_size);
>
> return (struct t_rec *)&this_cpu(t_data)[x];
> }
>
> -static inline int __insert_record(struct t_buf *buf,
> - unsigned long event,
> - int extra,
> - int cycles,
> - int rec_size,
> - unsigned char *extra_data)
> +static inline void __insert_record(volatile struct t_buf *buf,
> + unsigned long event,
> + unsigned int extra,
> + bool_t cycles,
> + unsigned int rec_size,
> + const void *extra_data)
> {
> struct t_rec *rec;
> unsigned char *dst;
> - unsigned long extra_word = extra/sizeof(u32);
> - int local_rec_size = calc_rec_size(cycles, extra);
> + unsigned int extra_word = extra / sizeof(u32);
> + unsigned int local_rec_size = calc_rec_size(cycles, extra);
> uint32_t next;
>
> BUG_ON(local_rec_size != rec_size);
> @@ -475,17 +506,20 @@ static inline int __insert_record(struct
> /* Double-check once more that we have enough space.
> * Don't bugcheck here, in case the userland tool is doing
> * something stupid. */
> - if ( calc_bytes_avail(buf) < rec_size )
> + next = calc_bytes_avail(buf);
> + if ( next < rec_size )
> {
> - printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n",
> - __func__,
> - calc_bytes_avail(buf),
> - data_size, buf->prod, buf->cons, data_size, rec_size);
> - return 0;
> + if ( printk_ratelimit() )
> + printk(XENLOG_WARNING
> + "%s: avail=%u (size=%08x prod=%08x cons=%08x) rec=%u\n",
> + __func__, next, data_size, buf->prod, buf->cons,
> rec_size);
> + return;
> }
> rmb();
>
> rec = next_record(buf);
> + if ( !rec )
> + return;
> rec->event = event;
> rec->extra_u32 = extra_word;
> dst = (unsigned char *)rec->u.nocycles.extra_u32;
> @@ -502,21 +536,22 @@ static inline int __insert_record(struct
>
> wmb();
>
> - next = buf->prod + rec_size;
> + next = buf->prod;
> + if ( bogus(next, buf->cons) )
> + return;
> + next += rec_size;
> if ( next >= 2*data_size )
> next -= 2*data_size;
> - ASSERT(next >= 0);
> ASSERT(next < 2*data_size);
> buf->prod = next;
> -
> - return rec_size;
> }
>
> -static inline int insert_wrap_record(struct t_buf *buf, int size)
> +static inline void insert_wrap_record(volatile struct t_buf *buf,
> + unsigned int size)
> {
> - int space_left = calc_bytes_to_wrap(buf);
> - unsigned long extra_space = space_left - sizeof(u32);
> - int cycles = 0;
> + u32 space_left = calc_bytes_to_wrap(buf);
> + unsigned int extra_space = space_left - sizeof(u32);
> + bool_t cycles = 0;
>
> BUG_ON(space_left > size);
>
> @@ -528,17 +563,13 @@ static inline int insert_wrap_record(str
> ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
> }
>
> - return __insert_record(buf,
> - TRC_TRACE_WRAP_BUFFER,
> - extra_space,
> - cycles,
> - space_left,
> - NULL);
> + __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles,
> + space_left, NULL);
> }
>
> #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
>
> -static inline int insert_lost_records(struct t_buf *buf)
> +static inline void insert_lost_records(volatile struct t_buf *buf)
> {
> struct {
> u32 lost_records;
> @@ -553,12 +584,8 @@ static inline int insert_lost_records(st
>
> this_cpu(lost_records) = 0;
>
> - return __insert_record(buf,
> - TRC_LOST_RECORDS,
> - sizeof(ed),
> - 1 /* cycles */,
> - LOST_REC_SIZE,
> - (unsigned char *)&ed);
> + __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */,
> + LOST_REC_SIZE, &ed);
> }
>
> /*
> @@ -580,13 +607,15 @@ static DECLARE_TASKLET(trace_notify_dom0
> * failure, otherwise 0. Failure occurs only if the trace buffers are not
> yet
> * initialised.
> */
> -void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
> +void __trace_var(u32 event, bool_t cycles, unsigned int extra,
> + const void *extra_data)
> {
> - struct t_buf *buf;
> - unsigned long flags, bytes_to_tail, bytes_to_wrap;
> - int rec_size, total_size;
> - int extra_word;
> - int started_below_highwater = 0;
> + volatile struct t_buf *buf;
> + unsigned long flags;
> + u32 bytes_to_tail, bytes_to_wrap;
> + unsigned int rec_size, total_size;
> + unsigned int extra_word;
> + bool_t started_below_highwater;
>
> if( !tb_init_done )
> return;
> @@ -619,13 +648,11 @@ void __trace_var(u32 event, int cycles,
>
> /* Read tb_init_done /before/ t_bufs. */
> rmb();
> -
> - spin_lock_irqsave(&this_cpu(t_lock), flags);
> -
> buf = this_cpu(t_bufs);
> -
> if ( unlikely(!buf) )
> - goto unlock;
> + return;
> +
> + spin_lock_irqsave(&this_cpu(t_lock), flags);
>
> started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
>
> --- 2010-06-15.orig/xen/include/xen/trace.h 2010-06-28 09:43:58.000000000
> +0200
> +++ 2010-06-15/xen/include/xen/trace.h 2010-06-25 12:08:36.000000000 +0200
> @@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op
>
> int trace_will_trace_event(u32 event);
>
> -void __trace_var(u32 event, int cycles, int extra, unsigned char
> *extra_data);
> +void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *);
>
> static inline void trace_var(u32 event, int cycles, int extra,
> unsigned char *extra_data)
> @@ -57,7 +57,7 @@ static inline void trace_var(u32 event,
> { \
> u32 _d[1]; \
> _d[0] = d1; \
> - __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \
> + __trace_var(_e, 1, sizeof(_d), _d); \
> } \
> } while ( 0 )
>
> @@ -68,7 +68,7 @@ static inline void trace_var(u32 event,
> u32 _d[2]; \
> _d[0] = d1; \
> _d[1] = d2; \
> - __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \
> + __trace_var(_e, 1, sizeof(_d), _d); \
> } \
> } while ( 0 )
>
> @@ -80,7 +80,7 @@ static inline void trace_var(u32 event,
> _d[0] = d1; \
> _d[1] = d2; \
> _d[2] = d3; \
> - __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \
> + __trace_var(_e, 1, sizeof(_d), _d); \
> } \
> } while ( 0 )
>
> @@ -93,7 +93,7 @@ static inline void trace_var(u32 event,
> _d[1] = d2; \
> _d[2] = d3; \
> _d[3] = d4; \
> - __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \
> + __trace_var(_e, 1, sizeof(_d), _d); \
> } \
> } while ( 0 )
>
> @@ -107,7 +107,7 @@ static inline void trace_var(u32 event,
> _d[2] = d3; \
> _d[3] = d4; \
> _d[4] = d5; \
> - __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \
> + __trace_var(_e, 1, sizeof(_d), _d); \
> } \
> } while ( 0 )
>
> @@ -122,7 +122,7 @@ static inline void trace_var(u32 event,
> _d[3] = d4; \
> _d[4] = d5; \
> _d[5] = d6; \
> - __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \
> + __trace_var(_e, 1, sizeof(_d), _d); \
> } \
> } while ( 0 )
>
>
>
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH] trace security fixes
2010-06-28 11:12 ` Keir Fraser
@ 2010-06-28 12:02 ` Jan Beulich
0 siblings, 0 replies; 6+ messages in thread
From: Jan Beulich @ 2010-06-28 12:02 UTC (permalink / raw)
To: Keir Fraser; +Cc: George Dunlap, xen-devel@lists.xensource.com
>>> On 28.06.10 at 13:12, Keir Fraser <keir.fraser@eu.citrix.com> wrote:
> Both of these xentrace patches need an Ack from George Dunlap. Also I will
> need to know if they belong in any tree other than xen-unstable.
I would see them both as candidates for 4.0.x at least.
Jan
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] trace security fixes
2010-06-28 10:39 [PATCH] trace security fixes Jan Beulich
2010-06-28 11:12 ` Keir Fraser
@ 2010-06-29 14:21 ` George Dunlap
2010-06-29 15:29 ` [PATCH 0/6] trace: miscellaneous fixes Jan Beulich
1 sibling, 1 reply; 6+ messages in thread
From: George Dunlap @ 2010-06-29 14:21 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel
Jan,
Thanks for your work in this. Could you please break this patch down
into its component changes (described in the list below) so that I can
review, discuss, and ack/nack components individually? (And so they
show up as individual changesets when someone's doing archeology...)
So far I agree with the following changes:
> - there's no need to share writably the t_info pages (Dom0 only wants
> [and needs] to read it)
> - T_INFO_FIRST_OFFSET wasn't defined correctly, thus allowing in the
> num_online_cpus() == NR_CPUS case to pass a corrupted MFN to Dom0
> - printk()-s should be lower level or rate limited
But probably disagree with this one:
> - the hypervisor should not rely on any specific state of the actual
> trace buffer (as it is shared writable with Dom0)
And would like to be able to see the impact of following more clearly
(i.e., in separate patches):
> - check_tbuf_size() didn't consider the case of the incoming size not
> allowing for the 2*data_size range for t_buf->{prod,cons}
> To make clear what purpose specific variables have and/or where they
> got loaded from, the patch also changes the type of some of them to
> be explicitly u32/s32, and removes pointless assertions (like checking
> an unsigned variable to be >= 0).
>
> I also took the prototype adjustment of __trace_var() as an opportunity
> to simplify the TRACE_xD() macros. Similar simplification could be done
> on the (quite numerous) direct callers of the function.
Thanks again,
-George
>
> Signed-off-by: Jan Beulich <jbeulich@novell.com>
>
> --- 2010-06-15.orig/tools/xenmon/xenbaked.c 2010-06-01 13:39:57.000000000 +0200
> +++ 2010-06-15/tools/xenmon/xenbaked.c 2010-06-28 10:23:01.000000000 +0200
> @@ -84,7 +84,7 @@ typedef struct settings_st {
> } settings_t;
>
> struct t_struct {
> - struct t_info *t_info; /* Structure with information about individual buffers */
> + const struct t_info *t_info; /* Structure with information about individual buffers */
> struct t_buf **meta; /* Pointers to trace buffer metadata */
> unsigned char **data; /* Pointers to trace buffer data areas */
> };
> @@ -376,9 +376,8 @@ static struct t_struct *map_tbufs(unsign
> }
>
> /* Map t_info metadata structure */
> - tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN,
> - tinfo_size, PROT_READ | PROT_WRITE,
> - tbufs_mfn);
> + tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size,
> + PROT_READ, tbufs_mfn);
>
> if ( tbufs.t_info == 0 )
> {
> @@ -404,7 +403,8 @@ static struct t_struct *map_tbufs(unsign
> for(i=0; i<num; i++)
> {
>
> - uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) + tbufs.t_info->mfn_offset[i];
> + const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info
> + + tbufs.t_info->mfn_offset[i];
> int j;
> xen_pfn_t pfn_list[tbufs.t_info->tbuf_size];
>
> --- 2010-06-15.orig/tools/xentrace/xentrace.c 2010-06-01 13:39:57.000000000 +0200
> +++ 2010-06-15/tools/xentrace/xentrace.c 2010-06-28 10:23:26.000000000 +0200
> @@ -63,7 +63,7 @@ typedef struct settings_st {
> } settings_t;
>
> struct t_struct {
> - struct t_info *t_info; /* Structure with information about individual buffers */
> + const struct t_info *t_info; /* Structure with information about individual buffers */
> struct t_buf **meta; /* Pointers to trace buffer metadata */
> unsigned char **data; /* Pointers to trace buffer data areas */
> };
> @@ -475,9 +475,8 @@ static struct t_struct *map_tbufs(unsign
> int i;
>
> /* Map t_info metadata structure */
> - tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN,
> - tinfo_size, PROT_READ | PROT_WRITE,
> - tbufs_mfn);
> + tbufs.t_info = xc_map_foreign_range(xc_handle, DOMID_XEN, tinfo_size,
> + PROT_READ, tbufs_mfn);
>
> if ( tbufs.t_info == 0 )
> {
> @@ -503,7 +502,8 @@ static struct t_struct *map_tbufs(unsign
> for(i=0; i<num; i++)
> {
>
> - uint32_t *mfn_list = ((uint32_t *)tbufs.t_info) + tbufs.t_info->mfn_offset[i];
> + const uint32_t *mfn_list = (const uint32_t *)tbufs.t_info
> + + tbufs.t_info->mfn_offset[i];
> int j;
> xen_pfn_t pfn_list[tbufs.t_info->tbuf_size];
>
> --- 2010-06-15.orig/xen/common/trace.c 2010-06-28 09:43:58.000000000 +0200
> +++ 2010-06-15/xen/common/trace.c 2010-06-28 11:58:37.000000000 +0200
> @@ -51,15 +51,15 @@ static struct t_info *t_info;
> #define T_INFO_PAGES 2 /* Size fixed at 2 pages for now. */
> #define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE))
> /* t_info.tbuf_size + list of mfn offsets + 1 to round up / sizeof uint32_t */
> -#define T_INFO_FIRST_OFFSET ((sizeof(int16_t) + NR_CPUS * sizeof(int16_t) + 1) / sizeof(uint32_t))
> -static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
> +#define T_INFO_FIRST_OFFSET (((2 + NR_CPUS) * sizeof(uint16_t)) / sizeof(uint32_t))
> +static DEFINE_PER_CPU_READ_MOSTLY(volatile struct t_buf *, t_bufs);
> static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
> static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
> -static int data_size;
> +static u32 data_size;
>
> /* High water mark for trace buffers; */
> /* Send virtual interrupt when buffer level reaches this point */
> -static int t_buf_highwater;
> +static u32 t_buf_highwater;
>
> /* Number of records lost due to per-CPU trace buffer being full. */
> static DEFINE_PER_CPU(unsigned long, lost_records);
> @@ -77,11 +77,16 @@ static u32 tb_event_mask = TRC_ALL;
>
> /**
> * check_tbuf_size - check to make sure that the proposed size will fit
> - * in the currently sized struct t_info.
> + * in the currently sized struct t_info and allows prod and cons to
> + * reach double the value without overflow.
> */
> -static inline int check_tbuf_size(int size)
> +static int check_tbuf_size(u32 pages)
> {
> - return (num_online_cpus() * size + T_INFO_FIRST_OFFSET) > (T_INFO_SIZE / sizeof(uint32_t));
> + u32 size = pages * PAGE_SIZE;
> +
> + return (size / PAGE_SIZE != pages) || (size + size < size) ||
> + (num_online_cpus() * pages + T_INFO_FIRST_OFFSET >
> + T_INFO_SIZE / sizeof(uint32_t));
> }
>
> /**
> @@ -115,7 +120,7 @@ static int alloc_trace_bufs(void)
> }
>
> t_info->tbuf_size = opt_tbuf_size;
> - printk("tbuf_size %d\n", t_info->tbuf_size);
> + printk(XENLOG_INFO "tbuf_size %d\n", t_info->tbuf_size);
>
> nr_pages = opt_tbuf_size;
> order = get_order_from_pages(nr_pages);
> @@ -139,7 +144,7 @@ static int alloc_trace_bufs(void)
>
> spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
>
> - buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf;
> + per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf;
> buf->cons = buf->prod = 0;
> per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
>
> @@ -171,7 +176,7 @@ static int alloc_trace_bufs(void)
> /* Write list first, then write per-cpu offset. */
> wmb();
> t_info->mfn_offset[cpu]=offset;
> - printk("p%d mfn %"PRIx32" offset %d\n",
> + printk(XENLOG_INFO "p%d mfn %"PRIx32" offset %d\n",
> cpu, mfn, offset);
> offset+=i;
> }
> @@ -190,6 +195,7 @@ out_dealloc:
> spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
> if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
> {
> + per_cpu(t_bufs, cpu) = NULL;
> ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
> free_xenheap_pages(rawbuf, order);
> }
> @@ -308,7 +314,7 @@ void __init init_trace_bufs(void)
>
> for(i=0; i<T_INFO_PAGES; i++)
> share_xen_page_with_privileged_guests(
> - virt_to_page(t_info) + i, XENSHARE_writable);
> + virt_to_page(t_info) + i, XENSHARE_readonly);
>
> if ( opt_tbuf_size == 0 )
> {
> @@ -404,19 +410,37 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc
> return rc;
> }
>
> -static inline int calc_rec_size(int cycles, int extra)
> +static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra)
> {
> - int rec_size;
> - rec_size = 4;
> + unsigned int rec_size = 4;
> +
> if ( cycles )
> rec_size += 8;
> rec_size += extra;
> return rec_size;
> }
>
> -static inline int calc_unconsumed_bytes(struct t_buf *buf)
> +static inline bool_t bogus(u32 prod, u32 cons)
> +{
> + if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) ||
> + unlikely(cons & 3) || unlikely(cons >= 2 * data_size) )
> + {
> + tb_init_done = 0;
> + printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons (%08x)\n",
> + smp_processor_id(), prod, cons);
> + return 1;
> + }
> + return 0;
> +}
> +
> +static inline u32 calc_unconsumed_bytes(const volatile struct t_buf *buf)
> {
> - int x = buf->prod - buf->cons;
> + u32 prod = buf->prod, cons = buf->cons;
> + s32 x = prod - cons;
> +
> + if ( bogus(prod, cons) )
> + return data_size;
> +
> if ( x < 0 )
> x += 2*data_size;
>
> @@ -426,9 +450,14 @@ static inline int calc_unconsumed_bytes(
> return x;
> }
>
> -static inline int calc_bytes_to_wrap(struct t_buf *buf)
> +static inline u32 calc_bytes_to_wrap(const volatile struct t_buf *buf)
> {
> - int x = data_size - buf->prod;
> + u32 prod = buf->prod;
> + s32 x = data_size - prod;
> +
> + if ( bogus(prod, buf->cons) )
> + return 0;
> +
> if ( x <= 0 )
> x += data_size;
>
> @@ -438,35 +467,37 @@ static inline int calc_bytes_to_wrap(str
> return x;
> }
>
> -static inline int calc_bytes_avail(struct t_buf *buf)
> +static inline u32 calc_bytes_avail(const volatile struct t_buf *buf)
> {
> return data_size - calc_unconsumed_bytes(buf);
> }
>
> -static inline struct t_rec *
> -next_record(struct t_buf *buf)
> +static inline struct t_rec *next_record(const volatile struct t_buf *buf)
> {
> - int x = buf->prod;
> + u32 x = buf->prod;
> +
> + if ( !tb_init_done || bogus(x, buf->cons) )
> + return NULL;
> +
> if ( x >= data_size )
> x -= data_size;
>
> - ASSERT(x >= 0);
> ASSERT(x < data_size);
>
> return (struct t_rec *)&this_cpu(t_data)[x];
> }
>
> -static inline int __insert_record(struct t_buf *buf,
> - unsigned long event,
> - int extra,
> - int cycles,
> - int rec_size,
> - unsigned char *extra_data)
> +static inline void __insert_record(volatile struct t_buf *buf,
> + unsigned long event,
> + unsigned int extra,
> + bool_t cycles,
> + unsigned int rec_size,
> + const void *extra_data)
> {
> struct t_rec *rec;
> unsigned char *dst;
> - unsigned long extra_word = extra/sizeof(u32);
> - int local_rec_size = calc_rec_size(cycles, extra);
> + unsigned int extra_word = extra / sizeof(u32);
> + unsigned int local_rec_size = calc_rec_size(cycles, extra);
> uint32_t next;
>
> BUG_ON(local_rec_size != rec_size);
> @@ -475,17 +506,20 @@ static inline int __insert_record(struct
> /* Double-check once more that we have enough space.
> * Don't bugcheck here, in case the userland tool is doing
> * something stupid. */
> - if ( calc_bytes_avail(buf) < rec_size )
> + next = calc_bytes_avail(buf);
> + if ( next < rec_size )
> {
> - printk("%s: %u bytes left (%u - ((%u - %u) %% %u) recsize %u.\n",
> - __func__,
> - calc_bytes_avail(buf),
> - data_size, buf->prod, buf->cons, data_size, rec_size);
> - return 0;
> + if ( printk_ratelimit() )
> + printk(XENLOG_WARNING
> + "%s: avail=%u (size=%08x prod=%08x cons=%08x) rec=%u\n",
> + __func__, next, data_size, buf->prod, buf->cons, rec_size);
> + return;
> }
> rmb();
>
> rec = next_record(buf);
> + if ( !rec )
> + return;
> rec->event = event;
> rec->extra_u32 = extra_word;
> dst = (unsigned char *)rec->u.nocycles.extra_u32;
> @@ -502,21 +536,22 @@ static inline int __insert_record(struct
>
> wmb();
>
> - next = buf->prod + rec_size;
> + next = buf->prod;
> + if ( bogus(next, buf->cons) )
> + return;
> + next += rec_size;
> if ( next >= 2*data_size )
> next -= 2*data_size;
> - ASSERT(next >= 0);
> ASSERT(next < 2*data_size);
> buf->prod = next;
> -
> - return rec_size;
> }
>
> -static inline int insert_wrap_record(struct t_buf *buf, int size)
> +static inline void insert_wrap_record(volatile struct t_buf *buf,
> + unsigned int size)
> {
> - int space_left = calc_bytes_to_wrap(buf);
> - unsigned long extra_space = space_left - sizeof(u32);
> - int cycles = 0;
> + u32 space_left = calc_bytes_to_wrap(buf);
> + unsigned int extra_space = space_left - sizeof(u32);
> + bool_t cycles = 0;
>
> BUG_ON(space_left > size);
>
> @@ -528,17 +563,13 @@ static inline int insert_wrap_record(str
> ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
> }
>
> - return __insert_record(buf,
> - TRC_TRACE_WRAP_BUFFER,
> - extra_space,
> - cycles,
> - space_left,
> - NULL);
> + __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles,
> + space_left, NULL);
> }
>
> #define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
>
> -static inline int insert_lost_records(struct t_buf *buf)
> +static inline void insert_lost_records(volatile struct t_buf *buf)
> {
> struct {
> u32 lost_records;
> @@ -553,12 +584,8 @@ static inline int insert_lost_records(st
>
> this_cpu(lost_records) = 0;
>
> - return __insert_record(buf,
> - TRC_LOST_RECORDS,
> - sizeof(ed),
> - 1 /* cycles */,
> - LOST_REC_SIZE,
> - (unsigned char *)&ed);
> + __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */,
> + LOST_REC_SIZE, &ed);
> }
>
> /*
> @@ -580,13 +607,15 @@ static DECLARE_TASKLET(trace_notify_dom0
> * failure, otherwise 0. Failure occurs only if the trace buffers are not yet
> * initialised.
> */
> -void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
> +void __trace_var(u32 event, bool_t cycles, unsigned int extra,
> + const void *extra_data)
> {
> - struct t_buf *buf;
> - unsigned long flags, bytes_to_tail, bytes_to_wrap;
> - int rec_size, total_size;
> - int extra_word;
> - int started_below_highwater = 0;
> + volatile struct t_buf *buf;
> + unsigned long flags;
> + u32 bytes_to_tail, bytes_to_wrap;
> + unsigned int rec_size, total_size;
> + unsigned int extra_word;
> + bool_t started_below_highwater;
>
> if( !tb_init_done )
> return;
> @@ -619,13 +648,11 @@ void __trace_var(u32 event, int cycles,
>
> /* Read tb_init_done /before/ t_bufs. */
> rmb();
> -
> - spin_lock_irqsave(&this_cpu(t_lock), flags);
> -
> buf = this_cpu(t_bufs);
> -
> if ( unlikely(!buf) )
> - goto unlock;
> + return;
> +
> + spin_lock_irqsave(&this_cpu(t_lock), flags);
>
> started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
>
> --- 2010-06-15.orig/xen/include/xen/trace.h 2010-06-28 09:43:58.000000000 +0200
> +++ 2010-06-15/xen/include/xen/trace.h 2010-06-25 12:08:36.000000000 +0200
> @@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op
>
> int trace_will_trace_event(u32 event);
>
> -void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data);
> +void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *);
>
> static inline void trace_var(u32 event, int cycles, int extra,
> unsigned char *extra_data)
> @@ -57,7 +57,7 @@ static inline void trace_var(u32 event,
> { \
> u32 _d[1]; \
> _d[0] = d1; \
> - __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \
> + __trace_var(_e, 1, sizeof(_d), _d); \
> } \
> } while ( 0 )
>
> @@ -68,7 +68,7 @@ static inline void trace_var(u32 event,
> u32 _d[2]; \
> _d[0] = d1; \
> _d[1] = d2; \
> - __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \
> + __trace_var(_e, 1, sizeof(_d), _d); \
> } \
> } while ( 0 )
>
> @@ -80,7 +80,7 @@ static inline void trace_var(u32 event,
> _d[0] = d1; \
> _d[1] = d2; \
> _d[2] = d3; \
> - __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \
> + __trace_var(_e, 1, sizeof(_d), _d); \
> } \
> } while ( 0 )
>
> @@ -93,7 +93,7 @@ static inline void trace_var(u32 event,
> _d[1] = d2; \
> _d[2] = d3; \
> _d[3] = d4; \
> - __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \
> + __trace_var(_e, 1, sizeof(_d), _d); \
> } \
> } while ( 0 )
>
> @@ -107,7 +107,7 @@ static inline void trace_var(u32 event,
> _d[2] = d3; \
> _d[3] = d4; \
> _d[4] = d5; \
> - __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \
> + __trace_var(_e, 1, sizeof(_d), _d); \
> } \
> } while ( 0 )
>
> @@ -122,7 +122,7 @@ static inline void trace_var(u32 event,
> _d[3] = d4; \
> _d[4] = d5; \
> _d[5] = d6; \
> - __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \
> + __trace_var(_e, 1, sizeof(_d), _d); \
> } \
> } while ( 0 )
>
>
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
>
>
^ permalink raw reply [flat|nested] 6+ messages in thread* [PATCH 0/6] trace: miscellaneous fixes
2010-06-29 14:21 ` George Dunlap
@ 2010-06-29 15:29 ` Jan Beulich
0 siblings, 0 replies; 6+ messages in thread
From: Jan Beulich @ 2010-06-29 15:29 UTC (permalink / raw)
To: George Dunlap; +Cc: xen-devel
After getting a report of 3.2.3's xenmon crashing Xen (as it turned out
this was because c/s 17000 was backported to that tree without also
applying c/s 17515), this is the result of reviewing the code for
security issues:
- the hypervisor should not rely on any specific state of the actual
trace buffer (as it is shared writable with Dom0)
- there's no need to share writably the t_info pages (Dom0 only wants
[and needs] to read it)
- T_INFO_FIRST_OFFSET wasn't defined correctly, thus allowing in the
num_online_cpus() == NR_CPUS case to pass a corrupted MFN to Dom0
- check_tbuf_size() didn't consider the case of the incoming size not
allowing for the 2*data_size range for t_buf->{prod,cons}
- printk()-s should be lower level or rate limited
For the first of those items, I chose to use 'volatile' qualifiers
rather than sprinkling around barriers - this can certainly be changed
if considered unacceptable.
To make clear what purpose specific variables have and/or where they
got loaded from, the patch also changes the type of some of them to
be explicitly u32/s32, and removes pointless assertions (like checking
an unsigned variable to be >= 0).
I also took the prototype adjustment of __trace_var() as an opportunity
to simplify the TRACE_xD() macros. Similar simplification could be done
on the (quite numerous) direct callers of the function.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 0/6] trace: miscellaneous fixes
@ 2010-06-29 15:37 Jan Beulich
0 siblings, 0 replies; 6+ messages in thread
From: Jan Beulich @ 2010-06-29 15:37 UTC (permalink / raw)
To: George Dunlap; +Cc: xen-devel
[-- Attachment #1: Type: text/plain, Size: 13366 bytes --]
After getting a report of 3.2.3's xenmon crashing Xen (as it turned out
this was because c/s 17000 was backported to that tree without also
applying c/s 17515), I figured that the hypervisor shouldn't rely on any
specific state of the actual trace buffer (as it is shared writable with
Dom0)
I chose to use 'volatile' qualifiers rather than sprinkling around
barriers - this can certainly be changed if considered unacceptable.
To make clear what purpose specific variables have and/or where they
got loaded from, the patch also changes the type of some of them to
be explicitly u32/s32, and removes pointless assertions (like checking
an unsigned variable to be >= 0).
I also took the prototype adjustment of __trace_var() as an opportunity
to simplify the TRACE_xD() macros. Similar simplification could be done
on the (quite numerous) direct callers of the function.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- 2010-06-15.orig/xen/common/trace.c 2010-06-29 17:04:45.000000000 +0200
+++ 2010-06-15/xen/common/trace.c 2010-06-29 17:05:09.000000000 +0200
@@ -52,14 +52,14 @@ static struct t_info *t_info;
#define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE))
/* t_info.tbuf_size + list of mfn offsets + 1 to round up / sizeof uint32_t */
#define T_INFO_FIRST_OFFSET (((2 + NR_CPUS) * sizeof(uint16_t)) / sizeof(uint32_t))
-static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
+static DEFINE_PER_CPU_READ_MOSTLY(volatile struct t_buf *, t_bufs);
static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
-static int data_size;
+static u32 data_size;
/* High water mark for trace buffers; */
/* Send virtual interrupt when buffer level reaches this point */
-static int t_buf_highwater;
+static u32 t_buf_highwater;
/* Number of records lost due to per-CPU trace buffer being full. */
static DEFINE_PER_CPU(unsigned long, lost_records);
@@ -145,7 +145,7 @@ static int alloc_trace_bufs(void)
spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
- buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf;
+ per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf;
buf->cons = buf->prod = 0;
per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
@@ -196,6 +196,7 @@ out_dealloc:
spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
{
+ per_cpu(t_bufs, cpu) = NULL;
ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
free_xenheap_pages(rawbuf, order);
}
@@ -410,19 +411,37 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc
return rc;
}
-static inline int calc_rec_size(int cycles, int extra)
+static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra)
{
- int rec_size;
- rec_size = 4;
+ unsigned int rec_size = 4;
+
if ( cycles )
rec_size += 8;
rec_size += extra;
return rec_size;
}
-static inline int calc_unconsumed_bytes(struct t_buf *buf)
+static inline bool_t bogus(u32 prod, u32 cons)
+{
+ if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) ||
+ unlikely(cons & 3) || unlikely(cons >= 2 * data_size) )
+ {
+ tb_init_done = 0;
+ printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons (%08x)\n",
+ smp_processor_id(), prod, cons);
+ return 1;
+ }
+ return 0;
+}
+
+static inline u32 calc_unconsumed_bytes(const volatile struct t_buf *buf)
{
- int x = buf->prod - buf->cons;
+ u32 prod = buf->prod, cons = buf->cons;
+ s32 x = prod - cons;
+
+ if ( bogus(prod, cons) )
+ return data_size;
+
if ( x < 0 )
x += 2*data_size;
@@ -432,9 +451,14 @@ static inline int calc_unconsumed_bytes(
return x;
}
-static inline int calc_bytes_to_wrap(struct t_buf *buf)
+static inline u32 calc_bytes_to_wrap(const volatile struct t_buf *buf)
{
- int x = data_size - buf->prod;
+ u32 prod = buf->prod;
+ s32 x = data_size - prod;
+
+ if ( bogus(prod, buf->cons) )
+ return 0;
+
if ( x <= 0 )
x += data_size;
@@ -444,35 +468,37 @@ static inline int calc_bytes_to_wrap(str
return x;
}
-static inline int calc_bytes_avail(struct t_buf *buf)
+static inline u32 calc_bytes_avail(const volatile struct t_buf *buf)
{
return data_size - calc_unconsumed_bytes(buf);
}
-static inline struct t_rec *
-next_record(struct t_buf *buf)
+static inline struct t_rec *next_record(const volatile struct t_buf *buf)
{
- int x = buf->prod;
+ u32 x = buf->prod;
+
+ if ( !tb_init_done || bogus(x, buf->cons) )
+ return NULL;
+
if ( x >= data_size )
x -= data_size;
- ASSERT(x >= 0);
ASSERT(x < data_size);
return (struct t_rec *)&this_cpu(t_data)[x];
}
-static inline int __insert_record(struct t_buf *buf,
- unsigned long event,
- int extra,
- int cycles,
- int rec_size,
- unsigned char *extra_data)
+static inline void __insert_record(volatile struct t_buf *buf,
+ unsigned long event,
+ unsigned int extra,
+ bool_t cycles,
+ unsigned int rec_size,
+ const void *extra_data)
{
struct t_rec *rec;
unsigned char *dst;
- unsigned long extra_word = extra/sizeof(u32);
- int local_rec_size = calc_rec_size(cycles, extra);
+ unsigned int extra_word = extra / sizeof(u32);
+ unsigned int local_rec_size = calc_rec_size(cycles, extra);
uint32_t next;
BUG_ON(local_rec_size != rec_size);
@@ -488,11 +514,13 @@ static inline int __insert_record(struct
printk(XENLOG_WARNING
"%s: avail=%u (size=%08x prod=%08x cons=%08x) rec=%u\n",
__func__, next, data_size, buf->prod, buf->cons, rec_size);
- return 0;
+ return;
}
rmb();
rec = next_record(buf);
+ if ( !rec )
+ return;
rec->event = event;
rec->extra_u32 = extra_word;
dst = (unsigned char *)rec->u.nocycles.extra_u32;
@@ -509,21 +537,22 @@ static inline int __insert_record(struct
wmb();
- next = buf->prod + rec_size;
+ next = buf->prod;
+ if ( bogus(next, buf->cons) )
+ return;
+ next += rec_size;
if ( next >= 2*data_size )
next -= 2*data_size;
- ASSERT(next >= 0);
ASSERT(next < 2*data_size);
buf->prod = next;
-
- return rec_size;
}
-static inline int insert_wrap_record(struct t_buf *buf, int size)
+static inline void insert_wrap_record(volatile struct t_buf *buf,
+ unsigned int size)
{
- int space_left = calc_bytes_to_wrap(buf);
- unsigned long extra_space = space_left - sizeof(u32);
- int cycles = 0;
+ u32 space_left = calc_bytes_to_wrap(buf);
+ unsigned int extra_space = space_left - sizeof(u32);
+ bool_t cycles = 0;
BUG_ON(space_left > size);
@@ -535,17 +564,13 @@ static inline int insert_wrap_record(str
ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
}
- return __insert_record(buf,
- TRC_TRACE_WRAP_BUFFER,
- extra_space,
- cycles,
- space_left,
- NULL);
+ __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles,
+ space_left, NULL);
}
#define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
-static inline int insert_lost_records(struct t_buf *buf)
+static inline void insert_lost_records(volatile struct t_buf *buf)
{
struct {
u32 lost_records;
@@ -560,12 +585,8 @@ static inline int insert_lost_records(st
this_cpu(lost_records) = 0;
- return __insert_record(buf,
- TRC_LOST_RECORDS,
- sizeof(ed),
- 1 /* cycles */,
- LOST_REC_SIZE,
- (unsigned char *)&ed);
+ __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */,
+ LOST_REC_SIZE, &ed);
}
/*
@@ -587,13 +608,15 @@ static DECLARE_TASKLET(trace_notify_dom0
* failure, otherwise 0. Failure occurs only if the trace buffers are not yet
* initialised.
*/
-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
+void __trace_var(u32 event, bool_t cycles, unsigned int extra,
+ const void *extra_data)
{
- struct t_buf *buf;
- unsigned long flags, bytes_to_tail, bytes_to_wrap;
- int rec_size, total_size;
- int extra_word;
- int started_below_highwater = 0;
+ volatile struct t_buf *buf;
+ unsigned long flags;
+ u32 bytes_to_tail, bytes_to_wrap;
+ unsigned int rec_size, total_size;
+ unsigned int extra_word;
+ bool_t started_below_highwater;
if( !tb_init_done )
return;
@@ -626,13 +649,11 @@ void __trace_var(u32 event, int cycles,
/* Read tb_init_done /before/ t_bufs. */
rmb();
-
- spin_lock_irqsave(&this_cpu(t_lock), flags);
-
buf = this_cpu(t_bufs);
-
if ( unlikely(!buf) )
- goto unlock;
+ return;
+
+ spin_lock_irqsave(&this_cpu(t_lock), flags);
started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
--- 2010-06-15.orig/xen/include/xen/trace.h 2010-06-29 16:53:25.000000000 +0200
+++ 2010-06-15/xen/include/xen/trace.h 2010-06-25 12:08:36.000000000 +0200
@@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op
int trace_will_trace_event(u32 event);
-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data);
+void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *);
static inline void trace_var(u32 event, int cycles, int extra,
unsigned char *extra_data)
@@ -57,7 +57,7 @@ static inline void trace_var(u32 event,
{ \
u32 _d[1]; \
_d[0] = d1; \
- __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -68,7 +68,7 @@ static inline void trace_var(u32 event,
u32 _d[2]; \
_d[0] = d1; \
_d[1] = d2; \
- __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -80,7 +80,7 @@ static inline void trace_var(u32 event,
_d[0] = d1; \
_d[1] = d2; \
_d[2] = d3; \
- __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -93,7 +93,7 @@ static inline void trace_var(u32 event,
_d[1] = d2; \
_d[2] = d3; \
_d[3] = d4; \
- __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -107,7 +107,7 @@ static inline void trace_var(u32 event,
_d[2] = d3; \
_d[3] = d4; \
_d[4] = d5; \
- __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -122,7 +122,7 @@ static inline void trace_var(u32 event,
_d[3] = d4; \
_d[4] = d5; \
_d[5] = d6; \
- __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
[-- Attachment #2: trace-security.patch --]
[-- Type: text/plain, Size: 13362 bytes --]
After getting a report of 3.2.3's xenmon crashing Xen (as it turned out
this was because c/s 17000 was backported to that tree without also
applying c/s 17515), I figured that the hypervisor shouldn't rely on any
specific state of the actual trace buffer (as it is shared writable with
Dom0)
I chose to use 'volatile' qualifiers rather than sprinkling around
barriers - this can certainly be changed if considered unacceptable.
To make clear what purpose specific variables have and/or where they
got loaded from, the patch also changes the type of some of them to
be explicitly u32/s32, and removes pointless assertions (like checking
an unsigned variable to be >= 0).
I also took the prototype adjustment of __trace_var() as an opportunity
to simplify the TRACE_xD() macros. Similar simplification could be done
on the (quite numerous) direct callers of the function.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
--- 2010-06-15.orig/xen/common/trace.c 2010-06-29 17:04:45.000000000 +0200
+++ 2010-06-15/xen/common/trace.c 2010-06-29 17:05:09.000000000 +0200
@@ -52,14 +52,14 @@ static struct t_info *t_info;
#define T_INFO_SIZE ((T_INFO_PAGES)*(PAGE_SIZE))
/* t_info.tbuf_size + list of mfn offsets + 1 to round up / sizeof uint32_t */
#define T_INFO_FIRST_OFFSET (((2 + NR_CPUS) * sizeof(uint16_t)) / sizeof(uint32_t))
-static DEFINE_PER_CPU_READ_MOSTLY(struct t_buf *, t_bufs);
+static DEFINE_PER_CPU_READ_MOSTLY(volatile struct t_buf *, t_bufs);
static DEFINE_PER_CPU_READ_MOSTLY(unsigned char *, t_data);
static DEFINE_PER_CPU_READ_MOSTLY(spinlock_t, t_lock);
-static int data_size;
+static u32 data_size;
/* High water mark for trace buffers; */
/* Send virtual interrupt when buffer level reaches this point */
-static int t_buf_highwater;
+static u32 t_buf_highwater;
/* Number of records lost due to per-CPU trace buffer being full. */
static DEFINE_PER_CPU(unsigned long, lost_records);
@@ -145,7 +145,7 @@ static int alloc_trace_bufs(void)
spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
- buf = per_cpu(t_bufs, cpu) = (struct t_buf *)rawbuf;
+ per_cpu(t_bufs, cpu) = buf = (struct t_buf *)rawbuf;
buf->cons = buf->prod = 0;
per_cpu(t_data, cpu) = (unsigned char *)(buf + 1);
@@ -196,6 +196,7 @@ out_dealloc:
spin_lock_irqsave(&per_cpu(t_lock, cpu), flags);
if ( (rawbuf = (char *)per_cpu(t_bufs, cpu)) )
{
+ per_cpu(t_bufs, cpu) = NULL;
ASSERT(!(virt_to_page(rawbuf)->count_info & PGC_allocated));
free_xenheap_pages(rawbuf, order);
}
@@ -410,19 +411,37 @@ int tb_control(xen_sysctl_tbuf_op_t *tbc
return rc;
}
-static inline int calc_rec_size(int cycles, int extra)
+static inline unsigned int calc_rec_size(bool_t cycles, unsigned int extra)
{
- int rec_size;
- rec_size = 4;
+ unsigned int rec_size = 4;
+
if ( cycles )
rec_size += 8;
rec_size += extra;
return rec_size;
}
-static inline int calc_unconsumed_bytes(struct t_buf *buf)
+static inline bool_t bogus(u32 prod, u32 cons)
+{
+ if ( unlikely(prod & 3) || unlikely(prod >= 2 * data_size) ||
+ unlikely(cons & 3) || unlikely(cons >= 2 * data_size) )
+ {
+ tb_init_done = 0;
+ printk(XENLOG_WARNING "trc#%u: bogus prod (%08x) and/or cons (%08x)\n",
+ smp_processor_id(), prod, cons);
+ return 1;
+ }
+ return 0;
+}
+
+static inline u32 calc_unconsumed_bytes(const volatile struct t_buf *buf)
{
- int x = buf->prod - buf->cons;
+ u32 prod = buf->prod, cons = buf->cons;
+ s32 x = prod - cons;
+
+ if ( bogus(prod, cons) )
+ return data_size;
+
if ( x < 0 )
x += 2*data_size;
@@ -432,9 +451,14 @@ static inline int calc_unconsumed_bytes(
return x;
}
-static inline int calc_bytes_to_wrap(struct t_buf *buf)
+static inline u32 calc_bytes_to_wrap(const volatile struct t_buf *buf)
{
- int x = data_size - buf->prod;
+ u32 prod = buf->prod;
+ s32 x = data_size - prod;
+
+ if ( bogus(prod, buf->cons) )
+ return 0;
+
if ( x <= 0 )
x += data_size;
@@ -444,35 +468,37 @@ static inline int calc_bytes_to_wrap(str
return x;
}
-static inline int calc_bytes_avail(struct t_buf *buf)
+static inline u32 calc_bytes_avail(const volatile struct t_buf *buf)
{
return data_size - calc_unconsumed_bytes(buf);
}
-static inline struct t_rec *
-next_record(struct t_buf *buf)
+static inline struct t_rec *next_record(const volatile struct t_buf *buf)
{
- int x = buf->prod;
+ u32 x = buf->prod;
+
+ if ( !tb_init_done || bogus(x, buf->cons) )
+ return NULL;
+
if ( x >= data_size )
x -= data_size;
- ASSERT(x >= 0);
ASSERT(x < data_size);
return (struct t_rec *)&this_cpu(t_data)[x];
}
-static inline int __insert_record(struct t_buf *buf,
- unsigned long event,
- int extra,
- int cycles,
- int rec_size,
- unsigned char *extra_data)
+static inline void __insert_record(volatile struct t_buf *buf,
+ unsigned long event,
+ unsigned int extra,
+ bool_t cycles,
+ unsigned int rec_size,
+ const void *extra_data)
{
struct t_rec *rec;
unsigned char *dst;
- unsigned long extra_word = extra/sizeof(u32);
- int local_rec_size = calc_rec_size(cycles, extra);
+ unsigned int extra_word = extra / sizeof(u32);
+ unsigned int local_rec_size = calc_rec_size(cycles, extra);
uint32_t next;
BUG_ON(local_rec_size != rec_size);
@@ -488,11 +514,13 @@ static inline int __insert_record(struct
printk(XENLOG_WARNING
"%s: avail=%u (size=%08x prod=%08x cons=%08x) rec=%u\n",
__func__, next, data_size, buf->prod, buf->cons, rec_size);
- return 0;
+ return;
}
rmb();
rec = next_record(buf);
+ if ( !rec )
+ return;
rec->event = event;
rec->extra_u32 = extra_word;
dst = (unsigned char *)rec->u.nocycles.extra_u32;
@@ -509,21 +537,22 @@ static inline int __insert_record(struct
wmb();
- next = buf->prod + rec_size;
+ next = buf->prod;
+ if ( bogus(next, buf->cons) )
+ return;
+ next += rec_size;
if ( next >= 2*data_size )
next -= 2*data_size;
- ASSERT(next >= 0);
ASSERT(next < 2*data_size);
buf->prod = next;
-
- return rec_size;
}
-static inline int insert_wrap_record(struct t_buf *buf, int size)
+static inline void insert_wrap_record(volatile struct t_buf *buf,
+ unsigned int size)
{
- int space_left = calc_bytes_to_wrap(buf);
- unsigned long extra_space = space_left - sizeof(u32);
- int cycles = 0;
+ u32 space_left = calc_bytes_to_wrap(buf);
+ unsigned int extra_space = space_left - sizeof(u32);
+ bool_t cycles = 0;
BUG_ON(space_left > size);
@@ -535,17 +564,13 @@ static inline int insert_wrap_record(str
ASSERT((extra_space/sizeof(u32)) <= TRACE_EXTRA_MAX);
}
- return __insert_record(buf,
- TRC_TRACE_WRAP_BUFFER,
- extra_space,
- cycles,
- space_left,
- NULL);
+ __insert_record(buf, TRC_TRACE_WRAP_BUFFER, extra_space, cycles,
+ space_left, NULL);
}
#define LOST_REC_SIZE (4 + 8 + 16) /* header + tsc + sizeof(struct ed) */
-static inline int insert_lost_records(struct t_buf *buf)
+static inline void insert_lost_records(volatile struct t_buf *buf)
{
struct {
u32 lost_records;
@@ -560,12 +585,8 @@ static inline int insert_lost_records(st
this_cpu(lost_records) = 0;
- return __insert_record(buf,
- TRC_LOST_RECORDS,
- sizeof(ed),
- 1 /* cycles */,
- LOST_REC_SIZE,
- (unsigned char *)&ed);
+ __insert_record(buf, TRC_LOST_RECORDS, sizeof(ed), 1 /* cycles */,
+ LOST_REC_SIZE, &ed);
}
/*
@@ -587,13 +608,15 @@ static DECLARE_TASKLET(trace_notify_dom0
* failure, otherwise 0. Failure occurs only if the trace buffers are not yet
* initialised.
*/
-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data)
+void __trace_var(u32 event, bool_t cycles, unsigned int extra,
+ const void *extra_data)
{
- struct t_buf *buf;
- unsigned long flags, bytes_to_tail, bytes_to_wrap;
- int rec_size, total_size;
- int extra_word;
- int started_below_highwater = 0;
+ volatile struct t_buf *buf;
+ unsigned long flags;
+ u32 bytes_to_tail, bytes_to_wrap;
+ unsigned int rec_size, total_size;
+ unsigned int extra_word;
+ bool_t started_below_highwater;
if( !tb_init_done )
return;
@@ -626,13 +649,11 @@ void __trace_var(u32 event, int cycles,
/* Read tb_init_done /before/ t_bufs. */
rmb();
-
- spin_lock_irqsave(&this_cpu(t_lock), flags);
-
buf = this_cpu(t_bufs);
-
if ( unlikely(!buf) )
- goto unlock;
+ return;
+
+ spin_lock_irqsave(&this_cpu(t_lock), flags);
started_below_highwater = (calc_unconsumed_bytes(buf) < t_buf_highwater);
--- 2010-06-15.orig/xen/include/xen/trace.h 2010-06-29 16:53:25.000000000 +0200
+++ 2010-06-15/xen/include/xen/trace.h 2010-06-25 12:08:36.000000000 +0200
@@ -36,7 +36,7 @@ int tb_control(struct xen_sysctl_tbuf_op
int trace_will_trace_event(u32 event);
-void __trace_var(u32 event, int cycles, int extra, unsigned char *extra_data);
+void __trace_var(u32 event, bool_t cycles, unsigned int extra, const void *);
static inline void trace_var(u32 event, int cycles, int extra,
unsigned char *extra_data)
@@ -57,7 +57,7 @@ static inline void trace_var(u32 event,
{ \
u32 _d[1]; \
_d[0] = d1; \
- __trace_var(_e, 1, sizeof(*_d), (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -68,7 +68,7 @@ static inline void trace_var(u32 event,
u32 _d[2]; \
_d[0] = d1; \
_d[1] = d2; \
- __trace_var(_e, 1, sizeof(*_d)*2, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -80,7 +80,7 @@ static inline void trace_var(u32 event,
_d[0] = d1; \
_d[1] = d2; \
_d[2] = d3; \
- __trace_var(_e, 1, sizeof(*_d)*3, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -93,7 +93,7 @@ static inline void trace_var(u32 event,
_d[1] = d2; \
_d[2] = d3; \
_d[3] = d4; \
- __trace_var(_e, 1, sizeof(*_d)*4, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -107,7 +107,7 @@ static inline void trace_var(u32 event,
_d[2] = d3; \
_d[3] = d4; \
_d[4] = d5; \
- __trace_var(_e, 1, sizeof(*_d)*5, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
@@ -122,7 +122,7 @@ static inline void trace_var(u32 event,
_d[3] = d4; \
_d[4] = d5; \
_d[5] = d6; \
- __trace_var(_e, 1, sizeof(*_d)*6, (unsigned char *)_d); \
+ __trace_var(_e, 1, sizeof(_d), _d); \
} \
} while ( 0 )
[-- Attachment #3: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2010-06-29 15:37 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-06-28 10:39 [PATCH] trace security fixes Jan Beulich
2010-06-28 11:12 ` Keir Fraser
2010-06-28 12:02 ` Jan Beulich
2010-06-29 14:21 ` George Dunlap
2010-06-29 15:29 ` [PATCH 0/6] trace: miscellaneous fixes Jan Beulich
-- strict thread matches above, loose matches on Subject: below --
2010-06-29 15:37 Jan Beulich
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).