diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index cc48d3f..0341721 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -80,6 +80,26 @@ static void exit_sysfs(void) #define exit_sysfs() do { } while (0) #endif /* CONFIG_PM */ +static void nmi_cpu_switch(void *dummy) +{ + struct op_msrs *msrs = &__get_cpu_var(cpu_msrs); + model->switch_ctrs(msrs); +} + +static int nmi_switch_event(void) +{ + /* Check CPU 0 should be sufficient */ + struct op_msrs const *msrs = &per_cpu(cpu_msrs, 0); + + if (model->check_multiplexing(msrs) < 0) + return -EINVAL; + + spin_lock(&oprofilefs_lock); + on_each_cpu(nmi_cpu_switch, NULL, 0, 1); + spin_unlock(&oprofilefs_lock); + return 0; +} + static int profile_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { @@ -326,6 +346,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); + counter_config[i].save_count_low = 0; } return 0; @@ -455,6 +476,7 @@ int __init op_nmi_init(struct oprofile_operations *ops) ops->start = nmi_start; ops->stop = nmi_stop; ops->cpu_type = cpu_type; + ops->switch_events = nmi_switch_event; printk(KERN_INFO "oprofile: using NMI interrupt.\n"); return 0; } diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h index 2880b15..786d6e0 100644 --- a/arch/x86/oprofile/op_counter.h +++ b/arch/x86/oprofile/op_counter.h @@ -10,13 +10,14 @@ #ifndef OP_COUNTER_H #define OP_COUNTER_H -#define OP_MAX_COUNTER 8 +#define OP_MAX_COUNTER 32 /* Per-perfctr configuration as set via * oprofilefs. */ struct op_counter_config { unsigned long count; + unsigned long save_count_low; unsigned long enabled; unsigned long event; unsigned long kernel; diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c index 3d53487..4e35ee1 100644 --- a/arch/x86/oprofile/op_model_athlon.c +++ b/arch/x86/oprofile/op_model_athlon.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include @@ -18,8 +19,10 @@ #include "op_x86_model.h" #include "op_counter.h" -#define NUM_COUNTERS 4 -#define NUM_CONTROLS 4 +#define NUM_COUNTERS 32 +#define NUM_HARDWARE_COUNTERS 4 +#define NUM_CONTROLS 32 +#define NUM_HARDWARE_CONTROLS 4 #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) @@ -43,21 +46,26 @@ #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) static unsigned long reset_value[NUM_COUNTERS]; +DEFINE_PER_CPU(int, switch_index); +spinlock_t perfctr_lock; + static void athlon_fill_in_addresses(struct op_msrs * const msrs) { int i; for (i = 0; i < NUM_COUNTERS; i++) { - if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) - msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; + int hw_counter = i % NUM_HARDWARE_COUNTERS; + if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + hw_counter)) + msrs->counters[i].addr = MSR_K7_PERFCTR0 + hw_counter; else msrs->counters[i].addr = 0; } for (i = 0; i < NUM_CONTROLS; i++) { - if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) - msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; + int hw_control = i % NUM_HARDWARE_CONTROLS; + if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + hw_control)) + msrs->controls[i].addr = MSR_K7_EVNTSEL0 + hw_control; else msrs->controls[i].addr = 0; } @@ -69,8 +77,15 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) unsigned int low, high; int i; + for (i = 0; i < NUM_COUNTERS; ++i) { + if (counter_config[i].enabled) + reset_value[i] = counter_config[i].count; + else + reset_value[i] = 0; + } + /* clear all counters */ - for (i = 0 ; i < NUM_CONTROLS; ++i) { + for (i = 0 ; i < NUM_HARDWARE_CONTROLS; ++i) { if (unlikely(!CTRL_IS_RESERVED(msrs, i))) continue; CTRL_READ(low, high, msrs, i); @@ -80,14 +95,14 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) } /* avoid a false detection of ctr overflows in NMI handler */ - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { if (unlikely(!CTR_IS_RESERVED(msrs, i))) continue; CTR_WRITE(1, msrs, i); } /* enable active counters */ - for (i = 0; i < NUM_COUNTERS; ++i) { + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { reset_value[i] = counter_config[i].count; @@ -106,29 +121,49 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs) CTRL_SET_GUEST_ONLY(high, 0); CTRL_WRITE(low, high, msrs, i); - } else { - reset_value[i] = 0; } } } +/* + * Quick check to see if multiplexing is necessary. + * The check should be efficient since counters are used + * in ordre. + */ +static int athlon_check_multiplexing(struct op_msrs const * const msrs) +{ + int ret = 0; + + if (!counter_config[NUM_HARDWARE_COUNTERS].count) + ret = -EINVAL; + + return ret; +} + + static int athlon_check_ctrs(struct pt_regs * const regs, struct op_msrs const * const msrs) { unsigned int low, high; int i; - for (i = 0 ; i < NUM_COUNTERS; ++i) { - if (!reset_value[i]) + spin_lock(&perfctr_lock); + //printk("*** athlon_check_ctrs start ***\n"); + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { + int offset = i + __get_cpu_var(switch_index); + if (!reset_value[offset]) continue; CTR_READ(low, high, msrs, i); if (CTR_OVERFLOWED(low)) { - oprofile_add_sample(regs, i); - CTR_WRITE(reset_value[i], msrs, i); + oprofile_add_sample(regs, offset); + CTR_WRITE(reset_value[offset], msrs, i); } } + //printk("*** athlon_check_ctrs end ***\n"); + spin_unlock(&perfctr_lock); + /* See op_model_ppro.c */ return 1; } @@ -138,13 +173,14 @@ static void athlon_start(struct op_msrs const * const msrs) { unsigned int low, high; int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { if (reset_value[i]) { CTRL_READ(low, high, msrs, i); CTRL_SET_ACTIVE(low); CTRL_WRITE(low, high, msrs, i); } } + __get_cpu_var(switch_index) = 0; } @@ -153,26 +189,90 @@ static void athlon_stop(struct op_msrs const * const msrs) unsigned int low, high; int i; + spin_lock(&perfctr_lock); + /* Subtle: stop on all counters to avoid race with * setting our pm callback */ - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (!reset_value[i]) + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { + if (!reset_value[i + per_cpu(switch_index, smp_processor_id())]) continue; CTRL_READ(low, high, msrs, i); CTRL_SET_INACTIVE(low); CTRL_WRITE(low, high, msrs, i); } + + spin_unlock(&perfctr_lock); } + +static void athlon_switch_ctrs(struct op_msrs const * const msrs) +{ + unsigned int low, high; + int i, s = per_cpu(switch_index, smp_processor_id()); + + + //printk("### athlon_check_ctrs starts ###/n"); + + athlon_stop(msrs); + + + spin_lock(&perfctr_lock); + /* save the current hw counts */ + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { + int offset = i + s; + if (!reset_value[offset]) + continue; + CTR_READ(low, high, msrs, i); + /* convert counter value to actual count, assume high = -1 */ + counter_config[offset].save_count_low = (unsigned int)-1 - low - 1; + } + + + /* move to next eventset */ + s += NUM_HARDWARE_COUNTERS; + if ((s > NUM_HARDWARE_COUNTERS) || (counter_config[s].count == 0)) { + per_cpu(switch_index, smp_processor_id()) = 0; + s = 0; + } else + per_cpu(switch_index, smp_processor_id()) = s; + + /* enable next active counters */ + for (i = 0; i < NUM_HARDWARE_COUNTERS; ++i) { + int offset = i + s; + if ((counter_config[offset].enabled) && (CTR_IS_RESERVED(msrs,i))) { + if (unlikely(!counter_config[offset].save_count_low)) + counter_config[offset].save_count_low = counter_config[offset].count; + CTR_WRITE(counter_config[offset].save_count_low, msrs, i); + CTRL_READ(low, high, msrs, i); + CTRL_CLEAR_LO(low); + CTRL_CLEAR_HI(high); + CTRL_SET_ENABLE(low); + CTRL_SET_USR(low, counter_config[offset].user); + CTRL_SET_KERN(low, counter_config[offset].kernel); + CTRL_SET_UM(low, counter_config[offset].unit_mask); + CTRL_SET_EVENT_LOW(low, counter_config[offset].event); + CTRL_SET_EVENT_HIGH(high, counter_config[offset].event); + CTRL_SET_HOST_ONLY(high, 0); + CTRL_SET_GUEST_ONLY(high, 0); + CTRL_SET_ACTIVE(low); + CTRL_WRITE(low, high, msrs, i); + } + } + + //printk("### athlon_check_ctrs ends ###/n"); + spin_unlock(&perfctr_lock); +} + + static void athlon_shutdown(struct op_msrs const * const msrs) { int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { if (CTR_IS_RESERVED(msrs, i)) release_perfctr_nmi(MSR_K7_PERFCTR0 + i); } - for (i = 0 ; i < NUM_CONTROLS ; ++i) { + for (i = 0 ; i < NUM_HARDWARE_COUNTERS ; ++i) { if (CTRL_IS_RESERVED(msrs, i)) release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); } @@ -186,5 +286,7 @@ struct op_x86_model_spec const op_athlon_spec = { .check_ctrs = &athlon_check_ctrs, .start = &athlon_start, .stop = &athlon_stop, - .shutdown = &athlon_shutdown + .shutdown = &athlon_shutdown, + .switch_ctrs = &athlon_switch_ctrs, + .check_multiplexing = &athlon_check_multiplexing }; diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 45b605f..45003c2 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -41,6 +41,8 @@ struct op_x86_model_spec { void (*start)(struct op_msrs const * const msrs); void (*stop)(struct op_msrs const * const msrs); void (*shutdown)(struct op_msrs const * const msrs); + void (*switch_ctrs)(struct op_msrs const * const msrs); + int (*check_multiplexing)(struct op_msrs const * const msrs); }; extern struct op_x86_model_spec const op_ppro_spec; diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index 2c64517..3ebd8d2 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "oprof.h" @@ -24,6 +25,9 @@ struct oprofile_operations oprofile_ops; unsigned long oprofile_started; unsigned long backtrace_depth; +/* Multiplexing defaults at 5000 useconds */ +unsigned long time_slice = 5 * USEC_PER_SEC ; +struct timer_list switch_timer; static unsigned long is_setup; static DEFINE_MUTEX(start_mutex); @@ -87,6 +91,17 @@ out: return err; } +static void start_switch_timer(void) +{ + switch_timer.expires = jiffies + usecs_to_jiffies(time_slice); + add_timer(&switch_timer); +} + +static void switch_interrupt(unsigned long ptr) +{ + if (!oprofile_ops.switch_events()) + start_switch_timer(); +} /* Actually start profiling (echo 1>/dev/oprofile/enable) */ int oprofile_start(void) @@ -94,7 +109,6 @@ int oprofile_start(void) int err = -EINVAL; mutex_lock(&start_mutex); - if (!is_setup) goto out; @@ -108,6 +122,9 @@ int oprofile_start(void) if ((err = oprofile_ops.start())) goto out; + if (oprofile_ops.switch_events) + start_switch_timer(); + oprofile_started = 1; out: mutex_unlock(&start_mutex); @@ -123,6 +140,7 @@ void oprofile_stop(void) goto out; oprofile_ops.stop(); oprofile_started = 0; + del_timer_sync(&switch_timer); /* wake up the daemon to read what remains */ wake_up_buffer_waiter(); out: @@ -155,6 +173,29 @@ post_sync: mutex_unlock(&start_mutex); } +int oprofile_set_time_slice(unsigned long val) +{ + int err = 0; + + mutex_lock(&start_mutex); + + if (oprofile_started) { + err = -EBUSY; + goto out; + } + + if (!oprofile_ops.switch_events) { + err = -EINVAL; + goto out; + } + + time_slice = val; + +out: + mutex_unlock(&start_mutex); + return err; + +} int oprofile_set_backtrace(unsigned long val) { @@ -179,10 +220,18 @@ out: return err; } +static void __init oprofile_switch_timer_init(void) +{ + init_timer(&switch_timer); + switch_timer.function = switch_interrupt; + switch_timer.data = 0; +} + static int __init oprofile_init(void) { int err; + oprofile_switch_timer_init(); err = oprofile_arch_init(&oprofile_ops); if (err < 0 || timer) { @@ -191,8 +240,10 @@ static int __init oprofile_init(void) } err = oprofilefs_register(); - if (err) + if (err) { + del_timer_sync(&switch_timer); oprofile_arch_exit(); + } return err; } diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index 1832365..fc3a2bd 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h @@ -27,7 +27,8 @@ extern unsigned long fs_buffer_watershed; extern struct oprofile_operations oprofile_ops; extern unsigned long oprofile_started; extern unsigned long backtrace_depth; - +extern unsigned long time_slice; + struct super_block; struct dentry; @@ -35,5 +36,6 @@ void oprofile_create_files(struct super_block * sb, struct dentry * root); void oprofile_timer_init(struct oprofile_operations * ops); int oprofile_set_backtrace(unsigned long depth); +int oprofile_set_time_slice(unsigned long time); #endif /* OPROF_H */ diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c index ef953ba..25c78dd 100644 --- a/drivers/oprofile/oprofile_files.c +++ b/drivers/oprofile/oprofile_files.c @@ -18,6 +18,37 @@ unsigned long fs_buffer_size = 131072; unsigned long fs_cpu_buffer_size = 8192; unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */ +static ssize_t time_slice_read(struct file * file, char __user * buf, size_t count, loff_t * offset) +{ + return oprofilefs_ulong_to_user(time_slice, buf, count, offset); +} + + +static ssize_t time_slice_write(struct file * file, char const __user * buf, size_t count, loff_t * offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; + + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval) + return retval; + + retval = oprofile_set_time_slice(val); + + if (retval) + return retval; + return count; +} + +static const struct file_operations time_slice_fops = { + .read = time_slice_read, + .write = time_slice_write +}; + + static ssize_t depth_read(struct file * file, char __user * buf, size_t count, loff_t * offset) { return oprofilefs_ulong_to_user(backtrace_depth, buf, count, offset); @@ -85,11 +116,10 @@ static ssize_t enable_write(struct file * file, char const __user * buf, size_t if (*offset) return -EINVAL; - retval = oprofilefs_ulong_from_user(&val, buf, count); if (retval) return retval; - + if (val) retval = oprofile_start(); else @@ -129,6 +159,7 @@ void oprofile_create_files(struct super_block * sb, struct dentry * root) oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops); oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops); oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops); + oprofilefs_create_file(sb, root, "time_slice", &time_slice_fops); oprofile_create_stats_files(sb, root); if (oprofile_ops.create_files) oprofile_ops.create_files(sb, root); diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h index 041bb31..6c764cb 100644 --- a/include/linux/oprofile.h +++ b/include/linux/oprofile.h @@ -65,6 +65,9 @@ struct oprofile_operations { /* Initiate a stack backtrace. Optional. */ void (*backtrace)(struct pt_regs * const regs, unsigned int depth); + + /* Multiplex between different events. Optioinal. */ + int (*switch_events)(void); /* CPU identification string. */ char * cpu_type; };