From: Andrew Theurer <habanero@us.ibm.com>
To: xen-devel@lists.xensource.com
Subject: [PATCH] xenoprofile x86_64
Date: Mon, 22 Aug 2005 10:54:48 -0500 [thread overview]
Message-ID: <4309F548.5020002@us.ibm.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 573 bytes --]
Attached are patches for xenoprofile on x86_64. These are not
"production ready", but they do work on EM64T so far. I have not added
support for Opteron just yet (but will very soon). I wanted to get these
out ASAP in case anyone wanted to try them. There are not too many
changes from Renato's patches, mainly use of KERNEL_MODE instead of
RING_1, u64's here and there, and new x86_64 specific files. I have not
tested these patches on i386 (some changes needed). These should apply
on changeset 6315.
-Andrew
Signed-off-by: Andrew Theurer <habanero@us.ibm.com>
[-- Attachment #2: xenoprof-1.2-x86_64-xen.patch --]
[-- Type: text/plain, Size: 61825 bytes --]
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/Makefile ./xen/arch/x86/Makefile
--- ../xen-unstable.hg-6251/xen/arch/x86/Makefile 2005-08-19 23:46:22 -05:00
+++ ./xen/arch/x86/Makefile 2005-08-18 20:28:44 -05:00
@@ -33,7 +33,10 @@ ifneq ($(crash_debug),y)
OBJS := $(patsubst cdb%.o,,$(OBJS))
endif
+OBJS += oprofile/oprofile.o
+
default: $(TARGET)
+ make -C oprofile
$(TARGET): $(TARGET)-syms boot/mkelf32
./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000
@@ -60,6 +63,9 @@ asm-offsets.s: $(TARGET_SUBARCH)/asm-off
boot/mkelf32: boot/mkelf32.c
$(HOSTCC) $(HOSTCFLAGS) -o $@ $<
+oprofile/oprofile.o:
+ $(MAKE) -C oprofile
+
clean:
rm -f *.o *.s *~ core boot/*.o boot/*~ boot/core boot/mkelf32
rm -f x86_32/*.o x86_32/*~ x86_32/core
@@ -68,5 +74,6 @@ clean:
rm -f acpi/*.o acpi/*~ acpi/core
rm -f genapic/*.o genapic/*~ genapic/core
rm -f cpu/*.o cpu/*~ cpu/core
+ rm -f oprofile/*.o
.PHONY: default clean
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/nmi.c ./xen/arch/x86/nmi.c
--- ../xen-unstable.hg-6251/xen/arch/x86/nmi.c 2005-08-19 23:46:22 -05:00
+++ ./xen/arch/x86/nmi.c 2005-08-18 20:28:44 -05:00
@@ -5,6 +5,10 @@
*
* Started by Ingo Molnar <mingo@redhat.com>
*
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
* Fixes:
* Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
* Mikael Pettersson : Power Management for local APIC NMI watchdog.
@@ -35,6 +39,28 @@ static unsigned int nmi_p4_cccr_val;
static struct ac_timer nmi_timer[NR_CPUS];
static unsigned int nmi_timer_ticks[NR_CPUS];
+/*
+ * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
+ * - it may be reserved by some other driver, or not
+ * - when not reserved by some other driver, it may be used for
+ * the NMI watchdog, or not
+ *
+ * This is maintained separately from nmi_active because the NMI
+ * watchdog may also be driven from the I/O APIC timer.
+ */
+static spinlock_t lapic_nmi_owner_lock = SPIN_LOCK_UNLOCKED;
+static unsigned int lapic_nmi_owner;
+#define LAPIC_NMI_WATCHDOG (1<<0)
+#define LAPIC_NMI_RESERVED (1<<1)
+
+/* nmi_active:
+ * +1: the lapic NMI watchdog is active, but can be disabled
+ * 0: the lapic NMI watchdog has not been set up, and cannot
+ * be enabled
+ * -1: the lapic NMI watchdog is disabled, but can be enabled
+ */
+int nmi_active;
+
#define K7_EVNTSEL_ENABLE (1 << 22)
#define K7_EVNTSEL_INT (1 << 20)
#define K7_EVNTSEL_OS (1 << 17)
@@ -66,8 +92,6 @@ static unsigned int nmi_timer_ticks[NR_C
* max threshold. [IA32-Vol3, Section 14.9.9]
*/
#define MSR_P4_IQ_COUNTER0 0x30C
-#define MSR_P4_IQ_CCCR0 0x36C
-#define MSR_P4_CRU_ESCR0 0x3B8 /* ESCR no. 4 */
#define P4_NMI_CRU_ESCR0 P4_ESCR_EVENT_SELECT(0x3F)
#define P4_NMI_IQ_CCCR0 \
(P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
@@ -124,6 +148,70 @@ static inline void nmi_pm_init(void) { }
* Original code written by Keith Owens.
*/
+static void disable_lapic_nmi_watchdog(void)
+{
+ if (nmi_active <= 0)
+ return;
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ wrmsr(MSR_K7_EVNTSEL0, 0, 0);
+ break;
+ case X86_VENDOR_INTEL:
+ switch (boot_cpu_data.x86) {
+ case 6:
+ wrmsr(MSR_P6_EVNTSEL0, 0, 0);
+ break;
+ case 15:
+ if ( (smp_num_siblings <= 1) ||
+ ( (smp_processor_id() % smp_num_siblings) == 0) )
+ {
+ wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
+ wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
+ } else {
+ wrmsr(MSR_P4_IQ_CCCR1, 0, 0);
+ }
+ break;
+ }
+ break;
+ }
+ nmi_active = -1;
+ /* tell do_nmi() and others that we're not active any more */
+ nmi_watchdog = 0;
+}
+
+static void enable_lapic_nmi_watchdog(void)
+{
+ if (nmi_active < 0) {
+ nmi_watchdog = NMI_LOCAL_APIC;
+ setup_apic_nmi_watchdog();
+ }
+}
+
+int reserve_lapic_nmi(void)
+{
+ unsigned int old_owner;
+ spin_lock(&lapic_nmi_owner_lock);
+ old_owner = lapic_nmi_owner;
+ lapic_nmi_owner |= LAPIC_NMI_RESERVED;
+ spin_unlock(&lapic_nmi_owner_lock);
+ if (old_owner & LAPIC_NMI_RESERVED)
+ return -EBUSY;
+ if (old_owner & LAPIC_NMI_WATCHDOG)
+ disable_lapic_nmi_watchdog();
+ return 0;
+}
+
+void release_lapic_nmi(void)
+{
+ unsigned int new_owner;
+ spin_lock(&lapic_nmi_owner_lock);
+ new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED;
+ lapic_nmi_owner = new_owner;
+ spin_unlock(&lapic_nmi_owner_lock);
+ if (new_owner & LAPIC_NMI_WATCHDOG)
+ enable_lapic_nmi_watchdog();
+}
+
static void __pminit clear_msr_range(unsigned int base, unsigned int n)
{
unsigned int i;
@@ -241,6 +329,9 @@ void __pminit setup_apic_nmi_watchdog(vo
init_ac_timer(&nmi_timer[cpu], nmi_timer_fn, NULL, cpu);
+ lapic_nmi_owner = LAPIC_NMI_WATCHDOG;
+ nmi_active = 1;
+
nmi_pm_init();
}
@@ -337,3 +428,7 @@ void nmi_watchdog_tick(struct cpu_user_r
wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
}
}
+
+EXPORT_SYMBOL(reserve_lapic_nmi);
+EXPORT_SYMBOL(release_lapic_nmi);
+
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/Makefile ./xen/arch/x86/oprofile/Makefile
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/Makefile 1969-12-31 18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/Makefile 2005-08-18 20:28:44 -05:00
@@ -0,0 +1,9 @@
+
+include $(BASEDIR)/Rules.mk
+
+default: $(OBJS)
+ $(LD) $(LDFLAGS) -r -o oprofile.o $(OBJS)
+
+%.o: %.c $(HDRS) Makefile
+ $(CC) $(CFLAGS) -c $< -o $@
+
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/nmi_int.c ./xen/arch/x86/oprofile/nmi_int.c
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/nmi_int.c 1969-12-31 18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/nmi_int.c 2005-08-19 19:32:01 -05:00
@@ -0,0 +1,444 @@
+/**
+ * @file nmi_int.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <xen/event.h>
+#include <xen/types.h>
+#include <xen/errno.h>
+#include <xen/init.h>
+#include <public/xen.h>
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+#include <xen/delay.h>
+
+#include "op_counter.h"
+#include "op_x86_model.h"
+
+static struct op_x86_model_spec const * model;
+static struct op_msrs cpu_msrs[NR_CPUS];
+static unsigned long saved_lvtpc[NR_CPUS];
+
+#define VIRQ_BITMASK_SIZE (MAX_OPROF_DOMAINS/32 + 1)
+
+extern int active_domains[MAX_OPROF_DOMAINS];
+extern unsigned int adomains;
+
+extern struct domain * primary_profiler;
+extern struct domain * adomain_ptrs[MAX_OPROF_DOMAINS];
+extern unsigned long virq_ovf_pending[VIRQ_BITMASK_SIZE];
+
+extern int is_active(struct domain *d);
+extern int active_id(struct domain *d);
+extern int is_passive(struct domain *d);
+extern int is_profiled(struct domain *d);
+
+
+int nmi_profiling_started = 0;
+
+int active_virq_count = 0;
+int passive_virq_count = 0;
+int other_virq_count = 0;
+int other_id = -1;
+int xen_count = 0;
+int dom_count = 0;
+int ovf = 0;
+
+int nmi_callback(struct cpu_user_regs * regs, int cpu)
+{
+ int xen_mode = 0;
+
+ ovf = model->check_ctrs(cpu, &cpu_msrs[cpu], regs);
+ xen_mode = RING_0(regs);
+ if (ovf) {
+ if (xen_mode)
+ xen_count++;
+ else
+ dom_count++;
+
+ if (is_active(current->domain)) {
+ /* This is slightly incorrect. If we do not deliver
+ OVF virtual interrupts in a synchronous
+ manner, a process switch may happen in the domain
+ between the point the sample was collected and
+ the point at which a VIRQ was delivered. However,
+ it is not safe to call send_guest_virq from this
+ NMI context, it may lead to a deadlock since NMIs are
+ unmaskable. One optimization that we can do is
+ that if the sample occurs while domain code is
+ runnng, we know that it is safe to call
+ send_guest_virq, since we know no Xen code
+ is running at that time.
+ However, this may distort the sample distribution,
+ because we may lose more Xen mode samples.*/
+ active_virq_count++;
+ if (!xen_mode) {
+ send_guest_virq(current, VIRQ_PMC_OVF);
+ clear_bit(active_id(current->domain), &virq_ovf_pending[0]);
+ } else
+ set_bit(active_id(current->domain), &virq_ovf_pending[0]);
+ primary_profiler->shared_info->active_samples++;
+ }
+ else if (is_passive(current->domain)) {
+ set_bit(active_id(primary_profiler), &virq_ovf_pending[0]);
+ passive_virq_count++;
+ primary_profiler->shared_info->passive_samples++;
+ }
+ else {
+ other_virq_count++;
+ other_id = current->domain->domain_id;
+ primary_profiler->shared_info->other_samples++;
+ }
+ }
+ return 1;
+}
+
+static void free_msrs(void)
+{
+ int i;
+ for (i = 0; i < NR_CPUS; ++i) {
+ xfree(cpu_msrs[i].counters);
+ cpu_msrs[i].counters = NULL;
+ xfree(cpu_msrs[i].controls);
+ cpu_msrs[i].controls = NULL;
+ }
+}
+
+static int allocate_msrs(void)
+{
+ int success = 1;
+ size_t controls_size = sizeof(struct op_msr) * model->num_controls;
+ size_t counters_size = sizeof(struct op_msr) * model->num_counters;
+
+ int i;
+ for (i = 0; i < NR_CPUS; ++i) {
+ //if (!cpu_online(i))
+ if (!test_bit(i, &cpu_online_map))
+ continue;
+
+ cpu_msrs[i].counters = xmalloc_bytes(counters_size);
+ if (!cpu_msrs[i].counters) {
+ success = 0;
+ break;
+ }
+ cpu_msrs[i].controls = xmalloc_bytes(controls_size);
+ if (!cpu_msrs[i].controls) {
+ success = 0;
+ break;
+ }
+ }
+ if (!success)
+ free_msrs();
+
+ return success;
+}
+
+static void nmi_cpu_save_registers(struct op_msrs * msrs)
+{
+ unsigned int const nr_ctrs = model->num_counters;
+ unsigned int const nr_ctrls = model->num_controls;
+ struct op_msr * counters = msrs->counters;
+ struct op_msr * controls = msrs->controls;
+ unsigned int i;
+
+ for (i = 0; i < nr_ctrs; ++i) {
+ rdmsr(counters[i].addr,
+ counters[i].saved.low,
+ counters[i].saved.high);
+ }
+
+ for (i = 0; i < nr_ctrls; ++i) {
+ rdmsr(controls[i].addr,
+ controls[i].saved.low,
+ controls[i].saved.high);
+ }
+}
+
+static void nmi_save_registers(void * dummy)
+{
+ int cpu = smp_processor_id();
+ struct op_msrs * msrs = &cpu_msrs[cpu];
+ model->fill_in_addresses(msrs);
+ nmi_cpu_save_registers(msrs);
+}
+
+int nmi_reserve_counters(void)
+{
+ if (!allocate_msrs())
+ return -ENOMEM;
+
+ /* We walk a thin line between law and rape here.
+ * We need to be careful to install our NMI handler
+ * without actually triggering any NMIs as this will
+ * break the core code horrifically.
+ */
+ /* Don't we need to do this on all CPUs?*/
+ if (reserve_lapic_nmi() < 0) {
+ free_msrs();
+ return -EBUSY;
+ }
+ /* We need to serialize save and setup for HT because the subset
+ * of msrs are distinct for save and setup operations
+ */
+ on_each_cpu(nmi_save_registers, NULL, 0, 1);
+ return 0;
+}
+
+static void nmi_cpu_setup(void * dummy)
+{
+ int cpu = smp_processor_id();
+ struct op_msrs * msrs = &cpu_msrs[cpu];
+ model->setup_ctrs(msrs);
+}
+
+int nmi_setup_events(void)
+{
+ on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
+ return 0;
+}
+
+int nmi_enable_virq()
+{
+ set_nmi_callback(nmi_callback);
+ return 0;
+}
+
+static void nmi_cpu_start(void * dummy)
+{
+ int cpu = smp_processor_id();
+ struct op_msrs const * msrs = &cpu_msrs[cpu];
+ saved_lvtpc[cpu] = apic_read(APIC_LVTPC);
+ apic_write(APIC_LVTPC, APIC_DM_NMI);
+ model->start(msrs);
+}
+
+int nmi_start(void)
+{
+ on_each_cpu(nmi_cpu_start, NULL, 0, 1);
+ nmi_profiling_started = 1;
+ return 0;
+}
+
+static void nmi_cpu_stop(void * dummy)
+{
+ unsigned int v;
+ int cpu = smp_processor_id();
+ struct op_msrs const * msrs = &cpu_msrs[cpu];
+ model->stop(msrs);
+
+ /* restoring APIC_LVTPC can trigger an apic error because the delivery
+ * mode and vector nr combination can be illegal. That's by design: on
+ * power on apic lvt contain a zero vector nr which are legal only for
+ * NMI delivery mode. So inhibit apic err before restoring lvtpc
+ */
+ if (!(apic_read(APIC_LVTPC) & APIC_DM_NMI)
+ || (apic_read(APIC_LVTPC) & APIC_LVT_MASKED)) {
+ printk("nmi_stop: APIC not good %ul\n", apic_read(APIC_LVTPC));
+ mdelay(5000);
+ }
+ v = apic_read(APIC_LVTERR);
+ apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+ apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
+ apic_write(APIC_LVTERR, v);
+}
+
+void nmi_stop(void)
+{
+ nmi_profiling_started = 0;
+ on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
+ active_virq_count = 0;
+ passive_virq_count = 0;
+ other_virq_count = 0;
+ xen_count = 0;
+ dom_count = 0;
+}
+
+extern unsigned int read_ctr(struct op_msrs const * const msrs, int ctr);
+
+void nmi_sanity_check(struct cpu_user_regs *regs, int cpu)
+{
+ int i;
+ int masked = 0;
+
+ /* We may have missed some NMI interrupts if we were already
+ in an NMI context at that time. If this happens, then
+ the counters are not reset and in the case of P4, the
+ APIC LVT disable mask is set. In both cases we end up
+ losing samples. On P4, this condition can be detected
+ by checking the APIC LVT mask. But in P6, we need to
+ examine the counters for overflow. So, every timer
+ interrupt, we check that everything is OK */
+
+ if (apic_read(APIC_LVTPC) & APIC_LVT_MASKED)
+ masked = 1;
+
+ nmi_callback(regs, cpu);
+
+ if (ovf && masked) {
+ if (is_active(current->domain))
+ current->domain->shared_info->nmi_restarts++;
+ else if (is_passive(current->domain))
+ primary_profiler->shared_info->nmi_restarts++;
+ }
+
+ /*if (jiffies %1000 == 0) {
+ printk("cpu %d: sample count %d %d %d at %u\n", cpu, active_virq_count, passive_virq_count, other_virq_count, jiffies);
+ printk("other task id %d\n", other_id);
+ printk("%d in xen, %d in domain\n", xen_count, dom_count);
+ printk("counters %p %p\n", read_ctr(&cpu_msrs[cpu], 0), read_ctr(&cpu_msrs[cpu], 1));
+ }*/
+
+
+ for (i = 0; i < adomains; i++)
+ if (test_and_clear_bit(i, &virq_ovf_pending[0])) {
+ /* For now we do not support profiling of SMP guests */
+ /* virq is delivered to first VCPU */
+ send_guest_virq(adomain_ptrs[i]->vcpu[0], VIRQ_PMC_OVF);
+ }
+}
+
+void nmi_disable_virq(void)
+{
+ unset_nmi_callback();
+}
+
+static void nmi_restore_registers(struct op_msrs * msrs)
+{
+ unsigned int const nr_ctrs = model->num_counters;
+ unsigned int const nr_ctrls = model->num_controls;
+ struct op_msr * counters = msrs->counters;
+ struct op_msr * controls = msrs->controls;
+ unsigned int i;
+
+ for (i = 0; i < nr_ctrls; ++i) {
+ wrmsr(controls[i].addr,
+ controls[i].saved.low,
+ controls[i].saved.high);
+ }
+
+ for (i = 0; i < nr_ctrs; ++i) {
+ wrmsr(counters[i].addr,
+ counters[i].saved.low,
+ counters[i].saved.high);
+ }
+}
+
+static void nmi_cpu_shutdown(void * dummy)
+{
+ int cpu = smp_processor_id();
+ struct op_msrs * msrs = &cpu_msrs[cpu];
+ nmi_restore_registers(msrs);
+}
+
+void nmi_release_counters(void)
+{
+ on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
+ release_lapic_nmi();
+ free_msrs();
+}
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+static int __init p4_init(void)
+{
+ __u8 cpu_model = current_cpu_data.x86_model;
+ printk("cpu model: %d\n", cpu_model);
+ if (cpu_model > 4)
+ return 0;
+
+#ifndef CONFIG_SMP
+ printk("model is op_p4_spec (uniprocessor)\n");
+ model = &op_p4_spec;
+ return 1;
+#else
+ //switch (smp_num_siblings) {
+ printk("model is op_p4_ht2_spec (SMP)\n");
+ if (cpu_has_ht)
+ {
+ model = &op_p4_ht2_spec;
+ return 1;
+ }
+ else
+ {
+ printk("model is op_p4_spec (SMP)\n");
+ model = &op_p4_spec;
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+
+static int __init ppro_init(void)
+{
+ __u8 cpu_model = current_cpu_data.x86_model;
+
+ if (cpu_model > 0xd)
+ return 0;
+
+ model = &op_ppro_spec;
+ return 1;
+}
+
+int nmi_init(int *num_events, int *is_primary)
+{
+ __u8 vendor = current_cpu_data.x86_vendor;
+ __u8 family = current_cpu_data.x86;
+ int prim = 0;
+
+ if (!cpu_has_apic) {
+ printk("(XEN) cpu has no APIC\n");
+ return -ENODEV;
+ }
+
+ if (primary_profiler == NULL) {
+ primary_profiler = current->domain;
+ prim = 1;
+ }
+
+ if (primary_profiler != current->domain)
+ goto out;
+
+ printk("cpu vendor: %d\n", vendor);
+ printk("cpu family: %d\n", family);
+
+ switch (vendor) {
+ case X86_VENDOR_INTEL:
+ switch (family) {
+ /* Pentium IV */
+ case 0xf:
+ if (!p4_init())
+ return -ENODEV;
+ break;
+ /* A P6-class processor */
+ case 6:
+ if (!ppro_init())
+ return -ENODEV;
+ break;
+ default:
+ return -ENODEV;
+ }
+ break;
+ default:
+ return -ENODEV;
+ }
+out:
+ if (copy_to_user((void *)num_events, (void *)&model->num_counters, sizeof(int)))
+ return -EFAULT;
+ if (copy_to_user((void *)is_primary, (void *)&prim, sizeof(int)))
+ return -EFAULT;
+
+ return 0;
+}
+
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_counter.h ./xen/arch/x86/oprofile/op_counter.h
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_counter.h 1969-12-31 18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/op_counter.h 2005-08-18 20:28:44 -05:00
@@ -0,0 +1,33 @@
+/**
+ * @file op_counter.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+
+#define OP_MAX_COUNTER 8
+
+/* Per-perfctr configuration as set via
+ * oprofilefs.
+ */
+struct op_counter_config {
+ unsigned long count;
+ unsigned long enabled;
+ unsigned long event;
+ unsigned long kernel;
+ unsigned long user;
+ unsigned long unit_mask;
+};
+
+extern struct op_counter_config counter_config[];
+
+#endif /* OP_COUNTER_H */
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_model_p4.c ./xen/arch/x86/oprofile/op_model_p4.c
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_model_p4.c 1969-12-31 18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/op_model_p4.c 2005-08-19 22:25:07 -05:00
@@ -0,0 +1,748 @@
+/**
+ * @file op_model_p4.c
+ * P4 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <xen/types.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_EVENTS 39
+
+#define NUM_COUNTERS_NON_HT 8
+#define NUM_ESCRS_NON_HT 45
+#define NUM_CCCRS_NON_HT 18
+#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
+
+#define NUM_COUNTERS_HT2 4
+#define NUM_ESCRS_HT2 23
+#define NUM_CCCRS_HT2 9
+#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
+
+static unsigned int num_counters = NUM_COUNTERS_NON_HT;
+
+
+/* this has to be checked dynamically since the
+ hyper-threadedness of a chip is discovered at
+ kernel boot-time. */
+static inline void setup_num_counters(void)
+{
+#ifdef CONFIG_SMP
+ if (cpu_has_ht)
+ num_counters = NUM_COUNTERS_HT2;
+#endif
+}
+
+static int inline addr_increment(void)
+{
+#ifdef CONFIG_SMP
+ return cpu_has_ht ? 2 : 1;
+#else
+ return 1;
+#endif
+}
+
+
+/* tables to simulate simplified hardware view of p4 registers */
+struct p4_counter_binding {
+ int virt_counter;
+ int counter_address;
+ int cccr_address;
+};
+
+struct p4_event_binding {
+ int escr_select; /* value to put in CCCR */
+ int event_select; /* value to put in ESCR */
+ struct {
+ int virt_counter; /* for this counter... */
+ int escr_address; /* use this ESCR */
+ } bindings[2];
+};
+
+/* nb: these CTR_* defines are a duplicate of defines in
+ event/i386.p4*events. */
+
+
+#define CTR_BPU_0 (1 << 0)
+#define CTR_MS_0 (1 << 1)
+#define CTR_FLAME_0 (1 << 2)
+#define CTR_IQ_4 (1 << 3)
+#define CTR_BPU_2 (1 << 4)
+#define CTR_MS_2 (1 << 5)
+#define CTR_FLAME_2 (1 << 6)
+#define CTR_IQ_5 (1 << 7)
+
+static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
+ { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
+ { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
+ { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
+ { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
+ { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
+ { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
+ { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
+ { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
+};
+
+#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
+
+/* All cccr we don't use. */
+static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
+ MSR_P4_BPU_CCCR1, MSR_P4_BPU_CCCR3,
+ MSR_P4_MS_CCCR1, MSR_P4_MS_CCCR3,
+ MSR_P4_FLAME_CCCR1, MSR_P4_FLAME_CCCR3,
+ MSR_P4_IQ_CCCR0, MSR_P4_IQ_CCCR1,
+ MSR_P4_IQ_CCCR2, MSR_P4_IQ_CCCR3
+};
+
+/* p4 event codes in libop/op_event.h are indices into this table. */
+
+static struct p4_event_binding p4_events[NUM_EVENTS] = {
+
+ { /* BRANCH_RETIRED */
+ 0x05, 0x06,
+ { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
+ {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+ },
+
+ { /* MISPRED_BRANCH_RETIRED */
+ 0x04, 0x03,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+ },
+
+ { /* TC_DELIVER_MODE */
+ 0x01, 0x01,
+ { { CTR_MS_0, MSR_P4_TC_ESCR0},
+ { CTR_MS_2, MSR_P4_TC_ESCR1} }
+ },
+
+ { /* BPU_FETCH_REQUEST */
+ 0x00, 0x03,
+ { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
+ { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
+ },
+
+ { /* ITLB_REFERENCE */
+ 0x03, 0x18,
+ { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
+ { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
+ },
+
+ { /* MEMORY_CANCEL */
+ 0x05, 0x02,
+ { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
+ { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
+ },
+
+ { /* MEMORY_COMPLETE */
+ 0x02, 0x08,
+ { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+ { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+ },
+
+ { /* LOAD_PORT_REPLAY */
+ 0x02, 0x04,
+ { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+ { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+ },
+
+ { /* STORE_PORT_REPLAY */
+ 0x02, 0x05,
+ { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+ { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+ },
+
+ { /* MOB_LOAD_REPLAY */
+ 0x02, 0x03,
+ { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
+ { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
+ },
+
+ { /* PAGE_WALK_TYPE */
+ 0x04, 0x01,
+ { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
+ { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
+ },
+
+ { /* BSQ_CACHE_REFERENCE */
+ 0x07, 0x0c,
+ { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+ { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
+ },
+
+ { /* IOQ_ALLOCATION */
+ 0x06, 0x03,
+ { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+ { 0, 0 } }
+ },
+
+ { /* IOQ_ACTIVE_ENTRIES */
+ 0x06, 0x1a,
+ { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
+ { 0, 0 } }
+ },
+
+ { /* FSB_DATA_ACTIVITY */
+ 0x06, 0x17,
+ { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+ { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+ },
+
+ { /* BSQ_ALLOCATION */
+ 0x07, 0x05,
+ { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+ { 0, 0 } }
+ },
+
+ { /* BSQ_ACTIVE_ENTRIES */
+ 0x07, 0x06,
+ { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
+ { 0, 0 } }
+ },
+
+ { /* X87_ASSIST */
+ 0x05, 0x03,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+ },
+
+ { /* SSE_INPUT_ASSIST */
+ 0x01, 0x34,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* PACKED_SP_UOP */
+ 0x01, 0x08,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* PACKED_DP_UOP */
+ 0x01, 0x0c,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* SCALAR_SP_UOP */
+ 0x01, 0x0a,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* SCALAR_DP_UOP */
+ 0x01, 0x0e,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* 64BIT_MMX_UOP */
+ 0x01, 0x02,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* 128BIT_MMX_UOP */
+ 0x01, 0x1a,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* X87_FP_UOP */
+ 0x01, 0x04,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* X87_SIMD_MOVES_UOP */
+ 0x01, 0x2e,
+ { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+ { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+ },
+
+ { /* MACHINE_CLEAR */
+ 0x05, 0x02,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+ },
+
+ { /* GLOBAL_POWER_EVENTS */
+ 0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
+ { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+ { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+ },
+
+ { /* TC_MS_XFER */
+ 0x00, 0x05,
+ { { CTR_MS_0, MSR_P4_MS_ESCR0},
+ { CTR_MS_2, MSR_P4_MS_ESCR1} }
+ },
+
+ { /* UOP_QUEUE_WRITES */
+ 0x00, 0x09,
+ { { CTR_MS_0, MSR_P4_MS_ESCR0},
+ { CTR_MS_2, MSR_P4_MS_ESCR1} }
+ },
+
+ { /* FRONT_END_EVENT */
+ 0x05, 0x08,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+ },
+
+ { /* EXECUTION_EVENT */
+ 0x05, 0x0c,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+ },
+
+ { /* REPLAY_EVENT */
+ 0x05, 0x09,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+ },
+
+ { /* INSTR_RETIRED */
+ 0x04, 0x02,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+ },
+
+ { /* UOPS_RETIRED */
+ 0x04, 0x01,
+ { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+ { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+ },
+
+ { /* UOP_TYPE */
+ 0x02, 0x02,
+ { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
+ { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
+ },
+
+ { /* RETIRED_MISPRED_BRANCH_TYPE */
+ 0x02, 0x05,
+ { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+ { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+ },
+
+ { /* RETIRED_BRANCH_TYPE */
+ 0x02, 0x04,
+ { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+ { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+ }
+};
+
+
+#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
+
+#define ESCR_RESERVED_BITS 0x80000003
+#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
+#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
+#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
+#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
+#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
+#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
+#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
+#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
+#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
+
+#define CCCR_RESERVED_BITS 0x38030FFF
+#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
+#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
+#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
+#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
+#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
+#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
+#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
+#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
+#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
+
+#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
+#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
+#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
+
+
+/* this assigns a "stagger" to the current CPU, which is used throughout
+ the code in this module as an extra array offset, to select the "even"
+ or "odd" part of all the divided resources. */
+static unsigned int get_stagger(void)
+{
+#ifdef CONFIG_SMP
+ /*int cpu = smp_processor_id();
+ return (cpu != first_cpu(cpu_sibling_map[cpu]));*/
+ /* We want the two logical cpus of a physical cpu to use
+ disjoint set of counters. The following code is wrong. */
+ return 0;
+#endif
+ return 0;
+}
+
+
+/* finally, mediate access to a real hardware counter
+ by passing a "virtual" counter numer to this macro,
+ along with your stagger setting. */
+#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
+
+static unsigned long reset_value[NUM_COUNTERS_NON_HT];
+
+
+static void p4_fill_in_addresses(struct op_msrs * const msrs)
+{
+ unsigned int i;
+ unsigned int addr, stag;
+
+ setup_num_counters();
+ stag = get_stagger();
+
+ /* the counter registers we pay attention to */
+ for (i = 0; i < num_counters; ++i) {
+ msrs->counters[i].addr =
+ p4_counters[VIRT_CTR(stag, i)].counter_address;
+ }
+
+ /* FIXME: bad feeling, we don't save the 10 counters we don't use. */
+
+ /* 18 CCCR registers */
+ for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
+ addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
+ msrs->controls[i].addr = addr;
+ }
+
+ /* 43 ESCR registers in three or four discontiguous group */
+ for (addr = MSR_P4_BSU_ESCR0 + stag;
+ addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
+ msrs->controls[i].addr = addr;
+ }
+
+ /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
+ * to avoid special case in nmi_{save|restore}_registers() */
+ if (boot_cpu_data.x86_model >= 0x3) {
+ for (addr = MSR_P4_BSU_ESCR0 + stag;
+ addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
+ msrs->controls[i].addr = addr;
+ }
+ } else {
+ for (addr = MSR_P4_IQ_ESCR0 + stag;
+ addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
+ msrs->controls[i].addr = addr;
+ }
+ }
+
+ for (addr = MSR_P4_RAT_ESCR0 + stag;
+ addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+ msrs->controls[i].addr = addr;
+ }
+
+ for (addr = MSR_P4_MS_ESCR0 + stag;
+ addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
+ msrs->controls[i].addr = addr;
+ }
+
+ for (addr = MSR_P4_IX_ESCR0 + stag;
+ addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
+ msrs->controls[i].addr = addr;
+ }
+
+ /* there are 2 remaining non-contiguously located ESCRs */
+
+ if (num_counters == NUM_COUNTERS_NON_HT) {
+ /* standard non-HT CPUs handle both remaining ESCRs*/
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+ } else if (stag == 0) {
+ /* HT CPUs give the first remainder to the even thread, as
+ the 32nd control register */
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+ } else {
+ /* and two copies of the second to the odd thread,
+ for the 22st and 23nd control registers */
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+ msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+ }
+}
+
+
+static void pmc_setup_one_p4_counter(unsigned int ctr)
+{
+ int i;
+ int const maxbind = 2;
+ unsigned int cccr = 0;
+ unsigned int escr = 0;
+ unsigned int high = 0;
+ unsigned int counter_bit;
+ struct p4_event_binding *ev = NULL;
+ unsigned int stag;
+
+ stag = get_stagger();
+
+ /* convert from counter *number* to counter *bit* */
+ counter_bit = 1 << VIRT_CTR(stag, ctr);
+
+ /* find our event binding structure. */
+ if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
+ printk(KERN_ERR
+ "oprofile: P4 event code 0x%lx out of range\n",
+ counter_config[ctr].event);
+ return;
+ }
+
+ ev = &(p4_events[counter_config[ctr].event - 1]);
+
+ for (i = 0; i < maxbind; i++) {
+ if (ev->bindings[i].virt_counter & counter_bit) {
+
+ /* modify ESCR */
+ ESCR_READ(escr, high, ev, i);
+ ESCR_CLEAR(escr);
+ if (stag == 0) {
+ ESCR_SET_USR_0(escr, counter_config[ctr].user);
+ ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
+ } else {
+ ESCR_SET_USR_1(escr, counter_config[ctr].user);
+ ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
+ }
+ ESCR_SET_EVENT_SELECT(escr, ev->event_select);
+ ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
+ ESCR_WRITE(escr, high, ev, i);
+
+ /* modify CCCR */
+ CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
+ CCCR_CLEAR(cccr);
+ CCCR_SET_REQUIRED_BITS(cccr);
+ CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
+ if (stag == 0) {
+ CCCR_SET_PMI_OVF_0(cccr);
+ } else {
+ CCCR_SET_PMI_OVF_1(cccr);
+ }
+ CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
+ return;
+ }
+ }
+
+ printk(KERN_ERR
+ "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
+ counter_config[ctr].event, stag, ctr);
+}
+
+
+static void p4_setup_ctrs(struct op_msrs const * const msrs)
+{
+ unsigned int i;
+ unsigned int low, high;
+ unsigned int addr;
+ unsigned int stag;
+
+ stag = get_stagger();
+
+ rdmsr(MSR_IA32_MISC_ENABLE, low, high);
+ if (! MISC_PMC_ENABLED_P(low)) {
+ printk(KERN_ERR "oprofile: P4 PMC not available\n");
+ return;
+ }
+
+ /* clear the cccrs we will use */
+ for (i = 0 ; i < num_counters ; i++) {
+ rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+ CCCR_CLEAR(low);
+ CCCR_SET_REQUIRED_BITS(low);
+ wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+ }
+
+ /* clear cccrs outside our concern */
+ for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
+ rdmsr(p4_unused_cccr[i], low, high);
+ CCCR_CLEAR(low);
+ CCCR_SET_REQUIRED_BITS(low);
+ wrmsr(p4_unused_cccr[i], low, high);
+ }
+
+ /* clear all escrs (including those outside our concern) */
+ for (addr = MSR_P4_BSU_ESCR0 + stag;
+ addr < MSR_P4_IQ_ESCR0; addr += addr_increment()) {
+ wrmsr(addr, 0, 0);
+ }
+
+ /* On older models clear also MSR_P4_IQ_ESCR0/1 */
+ if (boot_cpu_data.x86_model < 0x3) {
+ wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
+ wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
+ }
+
+ for (addr = MSR_P4_RAT_ESCR0 + stag;
+ addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+ wrmsr(addr, 0, 0);
+ }
+
+ for (addr = MSR_P4_MS_ESCR0 + stag;
+ addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){
+ wrmsr(addr, 0, 0);
+ }
+
+ for (addr = MSR_P4_IX_ESCR0 + stag;
+ addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){
+ wrmsr(addr, 0, 0);
+ }
+
+ if (num_counters == NUM_COUNTERS_NON_HT) {
+ wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
+ wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
+ } else if (stag == 0) {
+ wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
+ } else {
+ wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
+ }
+
+ /* setup all counters */
+ for (i = 0 ; i < num_counters ; ++i) {
+ if (counter_config[i].enabled) {
+ reset_value[i] = counter_config[i].count;
+ pmc_setup_one_p4_counter(i);
+ CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
+ } else {
+ reset_value[i] = 0;
+ }
+ }
+}
+
+
+extern void pmc_log_event(struct domain *d, u64 eip, int mode, int event);
+extern int is_profiled(struct domain * d);
+extern struct domain * primary_profiler;
+
+static int p4_check_ctrs(unsigned int const cpu,
+ struct op_msrs const * const msrs,
+ struct cpu_user_regs * const regs)
+{
+ unsigned long ctr, low, high, stag, real;
+ int i, ovf = 0;
+ u64 eip = regs->eip;
+ int mode = 0;
+ struct vcpu *v = current;
+
+ //if (RING_1(regs))
+ if (KERNEL_MODE(v, regs))
+ mode = 1;
+ else if (RING_0(regs))
+ mode = 2;
+
+ stag = get_stagger();
+
+ for (i = 0; i < num_counters; ++i) {
+ if (!reset_value[i])
+ continue;
+
+ /*
+ * there is some eccentricity in the hardware which
+ * requires that we perform 2 extra corrections:
+ *
+ * - check both the CCCR:OVF flag for overflow and the
+ * counter high bit for un-flagged overflows.
+ *
+ * - write the counter back twice to ensure it gets
+ * updated properly.
+ *
+ * the former seems to be related to extra NMIs happening
+ * during the current NMI; the latter is reported as errata
+ * N15 in intel doc 249199-029, pentium 4 specification
+ * update, though their suggested work-around does not
+ * appear to solve the problem.
+ */
+
+ real = VIRT_CTR(stag, i);
+
+ CCCR_READ(low, high, real);
+ CTR_READ(ctr, high, real);
+ if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
+ pmc_log_event(current->domain, eip, mode, i);
+ CTR_WRITE(reset_value[i], real);
+ CCCR_CLEAR_OVF(low);
+ CCCR_WRITE(low, high, real);
+ CTR_WRITE(reset_value[i], real);
+ ovf = 1;
+ }
+ }
+
+ /* P4 quirk: you have to re-unmask the apic vector */
+ apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+ /* See op_model_ppro.c */
+ return ovf;
+}
+
+
+static void p4_start(struct op_msrs const * const msrs)
+{
+ unsigned int low, high, stag;
+ int i;
+
+ stag = get_stagger();
+
+ for (i = 0; i < num_counters; ++i) {
+ if (!reset_value[i])
+ continue;
+ CCCR_READ(low, high, VIRT_CTR(stag, i));
+ CCCR_SET_ENABLE(low);
+ CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+ }
+}
+
+
+static void p4_stop(struct op_msrs const * const msrs)
+{
+ unsigned int low, high, stag;
+ int i;
+
+ stag = get_stagger();
+
+ for (i = 0; i < num_counters; ++i) {
+ CCCR_READ(low, high, VIRT_CTR(stag, i));
+ CCCR_SET_DISABLE(low);
+ CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+ }
+}
+
+
+#ifdef CONFIG_SMP
+struct op_x86_model_spec const op_p4_ht2_spec = {
+ .num_counters = NUM_COUNTERS_HT2,
+ .num_controls = NUM_CONTROLS_HT2,
+ .fill_in_addresses = &p4_fill_in_addresses,
+ .setup_ctrs = &p4_setup_ctrs,
+ .check_ctrs = &p4_check_ctrs,
+ .start = &p4_start,
+ .stop = &p4_stop
+};
+#endif
+
+struct op_x86_model_spec const op_p4_spec = {
+ .num_counters = NUM_COUNTERS_NON_HT,
+ .num_controls = NUM_CONTROLS_NON_HT,
+ .fill_in_addresses = &p4_fill_in_addresses,
+ .setup_ctrs = &p4_setup_ctrs,
+ .check_ctrs = &p4_check_ctrs,
+ .start = &p4_start,
+ .stop = &p4_stop
+};
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_model_ppro.c ./xen/arch/x86/oprofile/op_model_ppro.c
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_model_ppro.c 1969-12-31 18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/op_model_ppro.c 2005-08-19 20:36:40 -05:00
@@ -0,0 +1,168 @@
+/**
+ * @file op_model_ppro.h
+ * pentium pro / P6 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <xen/types.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_COUNTERS 2
+#define NUM_CONTROLS 2
+
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
+#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+
+#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_CLEAR(x) (x &= (1<<21))
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_EVENT(val, e) (val |= e)
+
+static unsigned long reset_value[NUM_COUNTERS];
+
+static void ppro_fill_in_addresses(struct op_msrs * const msrs)
+{
+ msrs->counters[0].addr = MSR_P6_PERFCTR0;
+ msrs->counters[1].addr = MSR_P6_PERFCTR1;
+
+ msrs->controls[0].addr = MSR_P6_EVNTSEL0;
+ msrs->controls[1].addr = MSR_P6_EVNTSEL1;
+}
+
+
+static void ppro_setup_ctrs(struct op_msrs const * const msrs)
+{
+ unsigned int low, high;
+ int i;
+
+ /* clear all counters */
+ for (i = 0 ; i < NUM_CONTROLS; ++i) {
+ CTRL_READ(low, high, msrs, i);
+ CTRL_CLEAR(low);
+ CTRL_WRITE(low, high, msrs, i);
+ }
+
+ /* avoid a false detection of ctr overflows in NMI handler */
+ for (i = 0; i < NUM_COUNTERS; ++i) {
+ CTR_WRITE(1, msrs, i);
+ }
+
+ /* enable active counters */
+ for (i = 0; i < NUM_COUNTERS; ++i) {
+ if (counter_config[i].enabled) {
+ reset_value[i] = counter_config[i].count;
+
+ CTR_WRITE(counter_config[i].count, msrs, i);
+
+ CTRL_READ(low, high, msrs, i);
+ CTRL_CLEAR(low);
+ CTRL_SET_ENABLE(low);
+ CTRL_SET_USR(low, counter_config[i].user);
+ CTRL_SET_KERN(low, counter_config[i].kernel);
+ CTRL_SET_UM(low, counter_config[i].unit_mask);
+ CTRL_SET_EVENT(low, counter_config[i].event);
+ CTRL_WRITE(low, high, msrs, i);
+ }
+ }
+}
+
+extern void pmc_log_event(struct domain *d, u64 eip, int mode, int event);
+extern int is_profiled(struct domain * d);
+extern struct domain * primary_profiler;
+
+static int ppro_check_ctrs(unsigned int const cpu,
+ struct op_msrs const * const msrs,
+ struct cpu_user_regs * const regs)
+{
+ unsigned int low, high;
+ int i, ovf = 0;
+ u64 eip = regs->eip;
+ int mode = 0;
+
+ if (RING_1(regs))
+ mode = 1;
+ else if (RING_0(regs))
+ mode = 2;
+
+ for (i = 0 ; i < NUM_COUNTERS; ++i) {
+ CTR_READ(low, high, msrs, i);
+ if (CTR_OVERFLOWED(low)) {
+ pmc_log_event(current->domain, eip, mode, i);
+ CTR_WRITE(reset_value[i], msrs, i);
+ ovf = 1;
+ }
+ }
+
+ /* Only P6 based Pentium M need to re-unmask the apic vector but it
+ * doesn't hurt other P6 variant */
+ apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+ /* We can't work out if we really handled an interrupt. We
+ * might have caught a *second* counter just after overflowing
+ * the interrupt for this counter then arrives
+ * and we don't find a counter that's overflowed, so we
+ * would return 0 and get dazed + confused. Instead we always
+ * assume we found an overflow. This sucks.
+ */
+ return ovf;
+}
+
+
+static void ppro_start(struct op_msrs const * const msrs)
+{
+ unsigned int low,high;
+ CTRL_READ(low, high, msrs, 0);
+ CTRL_SET_ACTIVE(low);
+ CTRL_WRITE(low, high, msrs, 0);
+}
+
+static void ppro_stop(struct op_msrs const * const msrs)
+{
+ unsigned int low,high;
+ CTRL_READ(low, high, msrs, 0);
+ CTRL_SET_INACTIVE(low);
+ CTRL_WRITE(low, high, msrs, 0);
+}
+
+unsigned int read_ctr(struct op_msrs const * const msrs, int i)
+{
+ unsigned int low, high;
+ CTR_READ(low, high, msrs, i);
+ return low;
+}
+
+struct op_x86_model_spec const op_ppro_spec = {
+ .num_counters = NUM_COUNTERS,
+ .num_controls = NUM_CONTROLS,
+ .fill_in_addresses = &ppro_fill_in_addresses,
+ .setup_ctrs = &ppro_setup_ctrs,
+ .check_ctrs = &ppro_check_ctrs,
+ .start = &ppro_start,
+ .stop = &ppro_stop
+};
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_x86_model.h ./xen/arch/x86/oprofile/op_x86_model.h
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_x86_model.h 1969-12-31 18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/op_x86_model.h 2005-08-18 20:28:44 -05:00
@@ -0,0 +1,55 @@
+/**
+ * @file op_x86_model.h
+ * interface to x86 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#ifndef OP_X86_MODEL_H
+#define OP_X86_MODEL_H
+
+struct op_saved_msr {
+ unsigned int high;
+ unsigned int low;
+};
+
+struct op_msr {
+ unsigned long addr;
+ struct op_saved_msr saved;
+};
+
+struct op_msrs {
+ struct op_msr * counters;
+ struct op_msr * controls;
+};
+
+struct pt_regs;
+
+/* The model vtable abstracts the differences between
+ * various x86 CPU model's perfctr support.
+ */
+struct op_x86_model_spec {
+ unsigned int const num_counters;
+ unsigned int const num_controls;
+ void (*fill_in_addresses)(struct op_msrs * const msrs);
+ void (*setup_ctrs)(struct op_msrs const * const msrs);
+ int (*check_ctrs)(unsigned int const cpu,
+ struct op_msrs const * const msrs,
+ struct cpu_user_regs * const regs);
+ void (*start)(struct op_msrs const * const msrs);
+ void (*stop)(struct op_msrs const * const msrs);
+};
+
+extern struct op_x86_model_spec const op_ppro_spec;
+extern struct op_x86_model_spec const op_p4_spec;
+extern struct op_x86_model_spec const op_p4_ht2_spec;
+extern struct op_x86_model_spec const op_athlon_spec;
+
+#endif /* OP_X86_MODEL_H */
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/pmc.c ./xen/arch/x86/oprofile/pmc.c
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/pmc.c 1969-12-31 18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/pmc.c 2005-08-19 20:34:32 -05:00
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * written by Aravind Menon, email: xenoprof@groups.hp.com
+ */
+
+#include <xen/sched.h>
+#include <asm/current.h>
+
+#include "op_counter.h"
+
+int active_domains[MAX_OPROF_DOMAINS];
+int passive_domains[MAX_OPROF_DOMAINS];
+unsigned int adomains = 0;
+unsigned int pdomains = 0;
+unsigned int activated = 0;
+
+#define VIRQ_BITMASK_SIZE (MAX_OPROF_DOMAINS/32 + 1)
+
+struct domain * primary_profiler = NULL;
+struct domain * adomain_ptrs[MAX_OPROF_DOMAINS];
+unsigned int virq_ovf_pending[VIRQ_BITMASK_SIZE];
+
+int is_active(struct domain *d)
+{
+ int i;
+ for (i = 0; i < adomains; i++)
+ if (d->domain_id == active_domains[i])
+ return 1;
+ return 0;
+}
+
+int active_id(struct domain *d)
+{
+ int i;
+ for (i = 0; i < adomains; i++)
+ if (d == adomain_ptrs[i])
+ return i;
+ return -1;
+}
+
+void free_adomain_ptrs()
+{
+ int i;
+ int num = adomains;
+
+ adomains = 0;
+ for (i = 0; i < VIRQ_BITMASK_SIZE; i++)
+ virq_ovf_pending[i] = 0;
+
+ for (i = 0; i < num; i++) {
+ put_domain(adomain_ptrs[i]);
+ adomain_ptrs[i] = NULL;
+ }
+}
+
+int set_adomain_ptrs(int num)
+{
+ int i;
+ struct domain *d;
+
+ for (i = 0; i < VIRQ_BITMASK_SIZE; i++)
+ virq_ovf_pending[i] = 0;
+
+ for (i = 0; i < num; i++) {
+ d = find_domain_by_id(active_domains[i]);
+ if (!d) {
+ free_adomain_ptrs();
+ return -EFAULT;
+ }
+ adomain_ptrs[i] = d;
+ adomains++;
+ }
+ return 0;
+}
+
+int set_active(struct domain *d)
+{
+ if (is_active(d))
+ return 0;
+ /* hack if we run out of space */
+ if (adomains >= MAX_OPROF_DOMAINS) {
+ adomains--;
+ put_domain(adomain_ptrs[adomains]);
+ }
+ active_domains[adomains] = d->domain_id;
+ if (get_domain(d))
+ adomain_ptrs[adomains++] = d;
+ else {
+ free_adomain_ptrs();
+ return -EFAULT;
+ }
+ return 0;
+}
+
+int is_passive(struct domain *d)
+{
+ int i;
+ for (i = 0; i < pdomains; i++)
+ if (d->domain_id == passive_domains[i])
+ return 1;
+ return 0;
+}
+
+int is_profiled(struct domain *d)
+{
+ if (is_active(d) || is_passive(d))
+ return 1;
+ return 0;
+}
+
+void pmc_log_event(struct domain *d, u64 eip, int mode, int event)
+{
+ shared_info_t *s = NULL;
+ struct domain *dest = d;
+ int head;
+ int tail;
+
+ if (!is_profiled(d))
+ return;
+
+ if (!is_passive(d)) {
+ s = dest->shared_info;
+ head = s->event_head;
+ tail = s->event_tail;
+ if ((head == tail - 1) ||
+ (head == MAX_OPROF_EVENTS - 1 && tail == 0)) {
+ s->losing_samples = 1;
+ s->samples_lost++;
+ }
+ else {
+ s->event_log[head].eip = eip;
+ s->event_log[head].mode = mode;
+ s->event_log[head].event = event;
+ head++;
+ if (head >= MAX_OPROF_EVENTS)
+ head = 0;
+ s->event_head = head;
+ }
+ }
+ /* passive domains */
+ else {
+ dest = primary_profiler;
+ s = dest->shared_info;
+ head = s->event_head;
+ tail = s->event_tail;
+
+ /* We use the following inefficient format for logging
+ events from other domains. We put a special record
+ indicating that the next record is for another domain.
+ This is done for each sample from another domain */
+
+ head = s->event_head;
+ if (head >= MAX_OPROF_EVENTS)
+ head = 0;
+ /* for passive domains we need to have at least two
+ entries empty in the buffer */
+ if ((head == tail - 1) ||
+ (head == tail - 2) ||
+ (head == MAX_OPROF_EVENTS - 1 && tail <= 1) ||
+ (head == MAX_OPROF_EVENTS - 2 && tail == 0) ) {
+ s->losing_samples = 1;
+ s->samples_lost++;
+ }
+ else {
+ s->event_log[head].eip = ~1;
+ s->event_log[head].mode = ~0;
+ s->event_log[head].event = d->domain_id;
+ head++;
+ if (head >= MAX_OPROF_EVENTS)
+ head = 0;
+ s->event_log[head].eip = eip;
+ s->event_log[head].mode = mode;
+ s->event_log[head].event = event;
+ head++;
+ if (head >= MAX_OPROF_EVENTS)
+ head = 0;
+ s->event_head = head;
+ }
+ }
+}
+
+static void pmc_event_init(struct domain *d)
+{
+ shared_info_t *s = d->shared_info;
+ s->event_head = 0;
+ s->event_tail = 0;
+ s->losing_samples = 0;
+ s->samples_lost = 0;
+ s->nmi_restarts = 0;
+ s->active_samples = 0;
+ s->passive_samples = 0;
+ s->other_samples = 0;
+}
+
+extern int nmi_init(int *num_events, int *is_primary);
+extern int nmi_reserve_counters(void);
+extern int nmi_setup_events(void);
+extern int nmi_enable_virq(void);
+extern int nmi_start(void);
+extern void nmi_stop(void);
+extern void nmi_disable_virq(void);
+extern void nmi_release_counters(void);
+
+#define PRIV_OP(op) ((op == PMC_SET_ACTIVE) || (op == PMC_SET_PASSIVE) || (op == PMC_RESERVE_COUNTERS) \
+ || (op == PMC_SETUP_EVENTS) || (op == PMC_START) || (op == PMC_STOP) \
+ || (op == PMC_RELEASE_COUNTERS) || (op == PMC_SHUTDOWN))
+
+int do_pmc_op(int op, u64 arg1, u64 arg2)
+{
+ int ret = 0;
+
+ if (PRIV_OP(op) && current->domain != primary_profiler)
+ return -EPERM;
+
+ switch (op) {
+ case PMC_INIT:
+ printk("PMC_INIT]\n");
+ ret = nmi_init((int *)arg1, (int *)arg2);
+ printk("nmi_init returned %d\n", ret);
+ break;
+
+ case PMC_SET_ACTIVE:
+ printk("PMC_SETACTIVE]\n");
+ if (adomains != 0)
+ return -EPERM;
+ if (copy_from_user((void *)&active_domains,
+ (void *)arg1, arg2*sizeof(int)))
+ return -EFAULT;
+ if (set_adomain_ptrs(arg2))
+ return -EFAULT;
+ if (set_active(current->domain))
+ return -EFAULT;
+ break;
+
+ case PMC_SET_PASSIVE:
+ printk("PMC_SETPASSIVE\n");
+ if (pdomains != 0)
+ return -EPERM;
+ if (copy_from_user((void *)&passive_domains,
+ (void *)arg1, arg2*sizeof(int)))
+ return -EFAULT;
+ pdomains = arg2;
+ break;
+
+ case PMC_RESERVE_COUNTERS:
+ printk("PMC_RESERVE_COUNTERS\n");
+ ret = nmi_reserve_counters();
+ break;
+
+ case PMC_SETUP_EVENTS:
+ printk("PMV_SETUP_EVENTS\n");
+ if (copy_from_user((void *)&counter_config,
+ (void *)arg1, arg2*sizeof(struct op_counter_config)))
+ return -EFAULT;
+ ret = nmi_setup_events();
+ break;
+
+ case PMC_ENABLE_VIRQ:
+ printk("PMC_ENABLE_VIRQ\n");
+ if (!is_active(current->domain)) {
+ if (current->domain != primary_profiler)
+ return -EPERM;
+ else
+ set_active(current->domain);
+ }
+ ret = nmi_enable_virq();
+ pmc_event_init(current->domain);
+ activated++;
+ break;
+
+ case PMC_START:
+ printk("PMC_START\n");
+ if (activated < adomains)
+ return -EPERM;
+ ret = nmi_start();
+ break;
+
+ case PMC_STOP:
+ printk("PMC_STOP\n");
+ nmi_stop();
+ break;
+
+ case PMC_DISABLE_VIRQ:
+ printk("PMC_DISBALE_VIRQ\n");
+ if (!is_active(current->domain))
+ return -EPERM;
+ nmi_disable_virq();
+ activated--;
+ break;
+
+ case PMC_RELEASE_COUNTERS:
+ printk("PMC_RELEASE_COUNTERS\n");
+ nmi_release_counters();
+ break;
+
+ case PMC_SHUTDOWN:
+ printk("PMC_SHUTDOWN\n");
+ free_adomain_ptrs();
+ pdomains = 0;
+ activated = 0;
+ primary_profiler = NULL;
+ break;
+
+ default:
+ ret = -EINVAL;
+ }
+ return ret;
+}
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/traps.c ./xen/arch/x86/traps.c
--- ../xen-unstable.hg-6251/xen/arch/x86/traps.c 2005-08-19 23:46:22 -05:00
+++ ./xen/arch/x86/traps.c 2005-08-18 20:28:44 -05:00
@@ -2,6 +2,10 @@
* arch/x86/traps.c
*
* Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
+ *
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -54,6 +58,7 @@
#include <asm/debugger.h>
#include <asm/msr.h>
#include <asm/x86_emulate.h>
+#include <asm/nmi.h>
/*
* opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
@@ -1040,7 +1045,7 @@ static void unknown_nmi_error(unsigned c
printk("Do you have a strange power saving mode enabled?\n");
}
-asmlinkage void do_nmi(struct cpu_user_regs *regs, unsigned long reason)
+static void default_do_nmi(struct cpu_user_regs * regs, unsigned long reason)
{
++nmi_count(smp_processor_id());
@@ -1055,6 +1060,35 @@ asmlinkage void do_nmi(struct cpu_user_r
unknown_nmi_error((unsigned char)(reason&0xff));
}
+static int dummy_nmi_callback(struct cpu_user_regs * regs, int cpu)
+{
+ return 0;
+}
+
+static nmi_callback_t nmi_callback = dummy_nmi_callback;
+
+asmlinkage void do_nmi(struct cpu_user_regs * regs, unsigned long reason)
+{
+ int cpu;
+ cpu = smp_processor_id();
+
+ if (!nmi_callback(regs, cpu))
+ default_do_nmi(regs, reason);
+}
+
+void set_nmi_callback(nmi_callback_t callback)
+{
+ nmi_callback = callback;
+}
+
+void unset_nmi_callback(void)
+{
+ nmi_callback = dummy_nmi_callback;
+}
+
+EXPORT_SYMBOL(set_nmi_callback);
+EXPORT_SYMBOL(unset_nmi_callback);
+
asmlinkage int math_state_restore(struct cpu_user_regs *regs)
{
/* Prevent recursion. */
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/x86_32/entry.S ./xen/arch/x86/x86_32/entry.S
--- ../xen-unstable.hg-6251/xen/arch/x86/x86_32/entry.S 2005-08-19 23:46:22 -05:00
+++ ./xen/arch/x86/x86_32/entry.S 2005-08-18 20:28:44 -05:00
@@ -763,7 +763,8 @@ ENTRY(hypercall_table)
.long do_boot_vcpu
.long do_ni_hypercall /* 25 */
.long do_mmuext_op
- .long do_acm_op /* 27 */
+ .long do_acm_op
+ .long do_pmc_op /* 28 */
.rept NR_hypercalls-((.-hypercall_table)/4)
.long do_ni_hypercall
.endr
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/x86_64/entry.S ./xen/arch/x86/x86_64/entry.S
--- ../xen-unstable.hg-6251/xen/arch/x86/x86_64/entry.S 2005-08-19 23:46:22 -05:00
+++ ./xen/arch/x86/x86_64/entry.S 2005-08-18 20:37:21 -05:00
@@ -593,6 +593,7 @@ ENTRY(hypercall_table)
.quad do_set_segment_base /* 25 */
.quad do_mmuext_op
.quad do_acm_op
+ .quad do_pmc_op
.rept NR_hypercalls-((.-hypercall_table)/4)
.quad do_ni_hypercall
.endr
diff -Naurp ../xen-unstable.hg-6251/xen/include/asm-x86/msr.h ./xen/include/asm-x86/msr.h
--- ../xen-unstable.hg-6251/xen/include/asm-x86/msr.h 2005-08-19 23:46:23 -05:00
+++ ./xen/include/asm-x86/msr.h 2005-08-18 20:28:44 -05:00
@@ -195,6 +195,89 @@
#define MSR_P6_EVNTSEL0 0x186
#define MSR_P6_EVNTSEL1 0x187
+/* Pentium IV performance counter MSRs */
+#define MSR_P4_BPU_PERFCTR0 0x300
+#define MSR_P4_BPU_PERFCTR1 0x301
+#define MSR_P4_BPU_PERFCTR2 0x302
+#define MSR_P4_BPU_PERFCTR3 0x303
+#define MSR_P4_MS_PERFCTR0 0x304
+#define MSR_P4_MS_PERFCTR1 0x305
+#define MSR_P4_MS_PERFCTR2 0x306
+#define MSR_P4_MS_PERFCTR3 0x307
+#define MSR_P4_FLAME_PERFCTR0 0x308
+#define MSR_P4_FLAME_PERFCTR1 0x309
+#define MSR_P4_FLAME_PERFCTR2 0x30a
+#define MSR_P4_FLAME_PERFCTR3 0x30b
+#define MSR_P4_IQ_PERFCTR0 0x30c
+#define MSR_P4_IQ_PERFCTR1 0x30d
+#define MSR_P4_IQ_PERFCTR2 0x30e
+#define MSR_P4_IQ_PERFCTR3 0x30f
+#define MSR_P4_IQ_PERFCTR4 0x310
+#define MSR_P4_IQ_PERFCTR5 0x311
+#define MSR_P4_BPU_CCCR0 0x360
+#define MSR_P4_BPU_CCCR1 0x361
+#define MSR_P4_BPU_CCCR2 0x362
+#define MSR_P4_BPU_CCCR3 0x363
+#define MSR_P4_MS_CCCR0 0x364
+#define MSR_P4_MS_CCCR1 0x365
+#define MSR_P4_MS_CCCR2 0x366
+#define MSR_P4_MS_CCCR3 0x367
+#define MSR_P4_FLAME_CCCR0 0x368
+#define MSR_P4_FLAME_CCCR1 0x369
+#define MSR_P4_FLAME_CCCR2 0x36a
+#define MSR_P4_FLAME_CCCR3 0x36b
+#define MSR_P4_IQ_CCCR0 0x36c
+#define MSR_P4_IQ_CCCR1 0x36d
+#define MSR_P4_IQ_CCCR2 0x36e
+#define MSR_P4_IQ_CCCR3 0x36f
+#define MSR_P4_IQ_CCCR4 0x370
+#define MSR_P4_IQ_CCCR5 0x371
+#define MSR_P4_ALF_ESCR0 0x3ca
+#define MSR_P4_ALF_ESCR1 0x3cb
+#define MSR_P4_BPU_ESCR0 0x3b2
+#define MSR_P4_BPU_ESCR1 0x3b3
+#define MSR_P4_BSU_ESCR0 0x3a0
+#define MSR_P4_BSU_ESCR1 0x3a1
+#define MSR_P4_CRU_ESCR0 0x3b8
+#define MSR_P4_CRU_ESCR1 0x3b9
+#define MSR_P4_CRU_ESCR2 0x3cc
+#define MSR_P4_CRU_ESCR3 0x3cd
+#define MSR_P4_CRU_ESCR4 0x3e0
+#define MSR_P4_CRU_ESCR5 0x3e1
+#define MSR_P4_DAC_ESCR0 0x3a8
+#define MSR_P4_DAC_ESCR1 0x3a9
+#define MSR_P4_FIRM_ESCR0 0x3a4
+#define MSR_P4_FIRM_ESCR1 0x3a5
+#define MSR_P4_FLAME_ESCR0 0x3a6
+#define MSR_P4_FLAME_ESCR1 0x3a7
+#define MSR_P4_FSB_ESCR0 0x3a2
+#define MSR_P4_FSB_ESCR1 0x3a3
+#define MSR_P4_IQ_ESCR0 0x3ba
+#define MSR_P4_IQ_ESCR1 0x3bb
+#define MSR_P4_IS_ESCR0 0x3b4
+#define MSR_P4_IS_ESCR1 0x3b5
+#define MSR_P4_ITLB_ESCR0 0x3b6
+#define MSR_P4_ITLB_ESCR1 0x3b7
+#define MSR_P4_IX_ESCR0 0x3c8
+#define MSR_P4_IX_ESCR1 0x3c9
+#define MSR_P4_MOB_ESCR0 0x3aa
+#define MSR_P4_MOB_ESCR1 0x3ab
+#define MSR_P4_MS_ESCR0 0x3c0
+#define MSR_P4_MS_ESCR1 0x3c1
+#define MSR_P4_PMH_ESCR0 0x3ac
+#define MSR_P4_PMH_ESCR1 0x3ad
+#define MSR_P4_RAT_ESCR0 0x3bc
+#define MSR_P4_RAT_ESCR1 0x3bd
+#define MSR_P4_SAAT_ESCR0 0x3ae
+#define MSR_P4_SAAT_ESCR1 0x3af
+#define MSR_P4_SSU_ESCR0 0x3be
+#define MSR_P4_SSU_ESCR1 0x3bf /* guess: not defined in manual */
+#define MSR_P4_TBPU_ESCR0 0x3c2
+#define MSR_P4_TBPU_ESCR1 0x3c3
+#define MSR_P4_TC_ESCR0 0x3c4
+#define MSR_P4_TC_ESCR1 0x3c5
+#define MSR_P4_U2L_ESCR0 0x3b0
+#define MSR_P4_U2L_ESCR1 0x3b1
/* K7/K8 MSRs. Not complete. See the architecture manual for a more complete list. */
#define MSR_K7_EVNTSEL0 0xC0010000
diff -Naurp ../xen-unstable.hg-6251/xen/include/asm-x86/nmi.h ./xen/include/asm-x86/nmi.h
--- ../xen-unstable.hg-6251/xen/include/asm-x86/nmi.h 1969-12-31 18:00:00 -06:00
+++ ./xen/include/asm-x86/nmi.h 2005-08-18 20:28:44 -05:00
@@ -0,0 +1,26 @@
+/*
+ * linux/include/asm-i386/nmi.h
+ */
+#ifndef ASM_NMI_H
+#define ASM_NMI_H
+
+struct cpu_user_regs;
+
+typedef int (*nmi_callback_t)(struct cpu_user_regs * regs, int cpu);
+
+/**
+ * set_nmi_callback
+ *
+ * Set a handler for an NMI. Only one handler may be
+ * set. Return 1 if the NMI was handled.
+ */
+void set_nmi_callback(nmi_callback_t callback);
+
+/**
+ * unset_nmi_callback
+ *
+ * Remove the handler previously set.
+ */
+void unset_nmi_callback(void);
+
+#endif /* ASM_NMI_H */
diff -Naurp ../xen-unstable.hg-6251/xen/include/public/xen.h ./xen/include/public/xen.h
--- ../xen-unstable.hg-6251/xen/include/public/xen.h 2005-08-19 23:46:23 -05:00
+++ ./xen/include/public/xen.h 2005-08-19 20:34:10 -05:00
@@ -4,6 +4,10 @@
* Guest OS interface to Xen.
*
* Copyright (c) 2004, K A Fraser
+ *
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
*/
#ifndef __XEN_PUBLIC_XEN_H__
@@ -59,6 +63,7 @@
#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */
#define __HYPERVISOR_mmuext_op 26
#define __HYPERVISOR_acm_op 27
+#define __HYPERVISOR_pmc_op 28
/*
* VIRTUAL INTERRUPTS
@@ -72,7 +77,8 @@
#define VIRQ_PARITY_ERR 4 /* (DOM0) NMI parity error. */
#define VIRQ_IO_ERR 5 /* (DOM0) NMI I/O error. */
#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */
-#define NR_VIRQS 7
+#define VIRQ_PMC_OVF 7 /* PMC Overflow */
+#define NR_VIRQS 8
/*
* MMU-UPDATE REQUESTS
@@ -239,6 +245,21 @@ struct mmuext_op {
#define VMASST_TYPE_writable_pagetables 2
#define MAX_VMASST_TYPE 2
+/*
+ * Commands to HYPERVISOR_pmc_op().
+ */
+#define PMC_INIT 0
+#define PMC_SET_ACTIVE 1
+#define PMC_SET_PASSIVE 2
+#define PMC_RESERVE_COUNTERS 3
+#define PMC_SETUP_EVENTS 4
+#define PMC_ENABLE_VIRQ 5
+#define PMC_START 6
+#define PMC_STOP 7
+#define PMC_DISABLE_VIRQ 8
+#define PMC_RELEASE_COUNTERS 9
+#define PMC_SHUTDOWN 10
+
#ifndef __ASSEMBLY__
typedef u16 domid_t;
@@ -291,6 +312,8 @@ typedef struct
/* Event channel endpoints per domain. */
#define NR_EVENT_CHANNELS 1024
+#define MAX_OPROF_EVENTS 32
+#define MAX_OPROF_DOMAINS 25
/*
* Per-VCPU information goes here. This will be cleaned up more when Xen
* actually supports multi-VCPU guests.
@@ -406,6 +429,21 @@ typedef struct shared_info {
u32 wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */
arch_shared_info_t arch;
+
+ /* Oprofile structures */
+ u8 event_head;
+ u8 event_tail;
+ struct {
+ u64 eip;
+ u8 mode;
+ u8 event;
+ } event_log[MAX_OPROF_EVENTS];
+ u8 losing_samples;
+ u64 samples_lost;
+ u32 nmi_restarts;
+ u64 active_samples;
+ u64 passive_samples;
+ u64 other_samples;
} shared_info_t;
[-- Attachment #3: xenoprof-1.2-x86_64-linux.patch --]
[-- Type: text/plain, Size: 37710 bytes --]
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/Kconfig xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/Kconfig
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/Kconfig 2005-08-22 19:43:15 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/Kconfig 2005-08-22 20:17:51 -05:00
@@ -200,4 +200,6 @@ source "crypto/Kconfig"
source "lib/Kconfig"
+source "arch/xen/oprofile/Kconfig"
+
source "arch/xen/Kconfig.debug"
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/Makefile xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/Makefile
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/Makefile 2005-08-22 19:43:15 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/Makefile 2005-08-22 20:17:51 -05:00
@@ -32,6 +32,8 @@ ifneq ($(KBUILD_SRC),)
$(Q)ln -fsn ../include/asm-$(XENARCH) include2/asm
endif
+drivers-$(CONFIG_OPROFILE) += arch/xen/oprofile/
+
include/.asm-ignore: include/asm
@rm -f include/.asm-ignore
@mv include/asm include/.asm-ignore
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32 xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32 2005-08-22 19:43:15 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32 2005-08-22 20:17:51 -05:00
@@ -79,6 +79,12 @@ CONFIG_OBSOLETE_MODPARM=y
CONFIG_KMOD=y
#
+# OProfile options
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+
+#
# X86 Processor Configuration
#
CONFIG_XENARCH="i386"
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32 xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32 2005-08-22 19:43:15 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32 2005-08-22 20:17:51 -05:00
@@ -76,6 +76,12 @@ CONFIG_KMOD=y
CONFIG_STOP_MACHINE=y
#
+# OProfile options
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+
+#
# X86 Processor Configuration
#
CONFIG_XENARCH="i386"
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/i386/Makefile xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/i386/Makefile
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/i386/Makefile 2005-08-22 19:43:15 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/i386/Makefile 2005-08-22 20:17:51 -05:00
@@ -84,7 +84,6 @@ core-y += arch/xen/i386/kernel/ \
drivers-$(CONFIG_MATH_EMULATION) += arch/i386/math-emu/
drivers-$(CONFIG_PCI) += arch/xen/i386/pci/
# must be linked after kernel/
-drivers-$(CONFIG_OPROFILE) += arch/i386/oprofile/
drivers-$(CONFIG_PM) += arch/i386/power/
# for clean
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c 2005-08-22 19:43:15 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c 2005-08-22 20:17:51 -05:00
@@ -44,11 +44,16 @@
#include <asm-xen/hypervisor.h>
#include <asm-xen/evtchn.h>
+int virq_to_phys(int virq);
+
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
EXPORT_SYMBOL(force_evtchn_callback);
EXPORT_SYMBOL(evtchn_do_upcall);
EXPORT_SYMBOL(bind_evtchn_to_irq);
EXPORT_SYMBOL(unbind_evtchn_from_irq);
+EXPORT_SYMBOL(virq_to_phys);
+EXPORT_SYMBOL(bind_virq_to_irq);
+EXPORT_SYMBOL(unbind_virq_from_irq);
#endif
/*
@@ -178,6 +183,15 @@ static int find_unbound_irq(void)
panic("No available IRQ to bind to: increase NR_IRQS!\n");
return irq;
+}
+
+int virq_to_phys(int virq)
+{
+ int cpu = smp_processor_id();
+
+ if (virq >= NR_VIRQS)
+ return -1;
+ return per_cpu(virq_to_irq,cpu)[virq];
}
int bind_virq_to_irq(int virq)
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig 1969-12-31 18:00:00 -06:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig 2005-08-22 20:17:51 -05:00
@@ -0,0 +1,23 @@
+
+menu "Profiling support"
+ depends on EXPERIMENTAL
+
+config PROFILING
+ bool "Profiling support (EXPERIMENTAL)"
+ help
+ Say Y here to enable the extended profiling support mechanisms used
+ by profilers such as OProfile.
+
+
+config OPROFILE
+ tristate "OProfile system profiling (EXPERIMENTAL)"
+ depends on PROFILING
+ help
+ OProfile is a profiling system capable of profiling the
+ whole system, include the kernel, kernel modules, libraries,
+ and applications.
+
+ If unsure, say N.
+
+endmenu
+
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/Makefile xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/Makefile
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/Makefile 1969-12-31 18:00:00 -06:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/Makefile 2005-08-22 20:17:51 -05:00
@@ -0,0 +1,9 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+ oprof.o cpu_buffer.o buffer_sync.o \
+ event_buffer.o oprofile_files.o \
+ oprofilefs.o oprofile_stats.o \
+ timer_int.o )
+
+oprofile-y := $(DRIVER_OBJS) pmc.o
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h 1969-12-31 18:00:00 -06:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h 2005-08-22 20:17:51 -05:00
@@ -0,0 +1,29 @@
+/**
+ * @file op_counter.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ */
+
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+
+#define OP_MAX_COUNTER 8
+
+/* Per-perfctr configuration as set via
+ * oprofilefs.
+ */
+struct op_counter_config {
+ unsigned long count;
+ unsigned long enabled;
+ unsigned long event;
+ unsigned long kernel;
+ unsigned long user;
+ unsigned long unit_mask;
+};
+
+extern struct op_counter_config counter_config[];
+
+#endif /* OP_COUNTER_H */
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c 1969-12-31 18:00:00 -06:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c 2005-08-22 20:17:51 -05:00
@@ -0,0 +1,323 @@
+/**
+ * @file nmi_int.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/smp.h>
+#include <linux/oprofile.h>
+#include <linux/sysdev.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+
+#include "op_counter.h"
+
+static int pmc_start(void);
+static void pmc_stop(void);
+
+/* 0 == registered but off, 1 == registered and on */
+static int pmc_enabled = 0;
+static int num_events = 0;
+static int is_primary = 0;
+
+#ifdef CONFIG_PM
+
+static int pmc_suspend(struct sys_device *dev, u32 state)
+{
+ if (pmc_enabled == 1)
+ pmc_stop();
+ return 0;
+}
+
+
+static int pmc_resume(struct sys_device *dev)
+{
+ if (pmc_enabled == 1)
+ pmc_start();
+ return 0;
+}
+
+
+static struct sysdev_class oprofile_sysclass = {
+ set_kset_name("oprofile"),
+ .resume = pmc_resume,
+ .suspend = pmc_suspend,
+};
+
+
+static struct sys_device device_oprofile = {
+ .id = 0,
+ .cls = &oprofile_sysclass,
+};
+
+
+static int __init init_driverfs(void)
+{
+ int error;
+ if (!(error = sysdev_class_register(&oprofile_sysclass)))
+ error = sysdev_register(&device_oprofile);
+ return error;
+}
+
+
+static void __exit exit_driverfs(void)
+{
+ sysdev_unregister(&device_oprofile);
+ sysdev_class_unregister(&oprofile_sysclass);
+}
+
+#else
+#define init_driverfs() do { } while (0)
+#define exit_driverfs() do { } while (0)
+#endif /* CONFIG_PM */
+
+unsigned long long oprofile_samples = 0;
+
+static irqreturn_t pmc_ovf_interrupt (int irq, void *dev_id, struct pt_regs *regs)
+{
+ int head, tail;
+ shared_info_t *s = HYPERVISOR_shared_info;
+
+ head = s->event_head;
+ tail = s->event_tail;
+
+ /* oprofile_add_sample will also handle samples from other domains */
+
+ if (tail > head) {
+ while (tail < MAX_OPROF_EVENTS) {
+ oprofile_add_sample_xen(s->event_log[tail].eip,
+ s->event_log[tail].mode,
+ s->event_log[tail].event);
+ /*printk(KERN_INFO "pmc_sample: %p, %d, %d\n",
+ s->event_log[tail].eip, s->event_log[tail].mode,
+ s->event_log[tail].event);*/
+ oprofile_samples++;
+ tail++;
+ }
+ tail = 0;
+ }
+ while (tail < head) {
+ oprofile_add_sample_xen(s->event_log[tail].eip,
+ s->event_log[tail].mode, s->event_log[tail].event);
+ /*printk(KERN_INFO "pmc_sample: %p, %d, %d\n",
+ s->event_log[tail].eip, s->event_log[tail].mode,
+ s->event_log[tail].event);*/
+ oprofile_samples++;
+ tail++;
+ }
+
+ s->event_tail = tail;
+ s->losing_samples = 0;
+
+ return IRQ_HANDLED;
+}
+
+extern int virq_to_phys(int virq);
+
+static int pmc_setup(void)
+{
+ int ret;
+
+ if ((ret = request_irq(bind_virq_to_irq(VIRQ_PMC_OVF),
+ pmc_ovf_interrupt, SA_INTERRUPT, "pmc_ovf", NULL)))
+ goto release_irq;
+
+ if (is_primary) {
+ ret = HYPERVISOR_pmc_op(PMC_RESERVE_COUNTERS, (u64)NULL, (u64)NULL);
+ //printk(KERN_INFO "pmc_setup: reserve_counters: ret %d\n", ret);
+
+ ret = HYPERVISOR_pmc_op(PMC_SETUP_EVENTS, (u64)&counter_config, (u64)num_events);
+ //printk(KERN_INFO "pmc_setup: setup_events: ret %d\n", ret);
+ }
+
+ ret = HYPERVISOR_pmc_op(PMC_ENABLE_VIRQ, (u64)NULL, (u64)NULL);
+ //printk(KERN_INFO "pmc_setup: enable_virq: ret %d\n", ret);
+
+ pmc_enabled = 1;
+ return 0;
+
+release_irq:
+ free_irq(virq_to_phys(VIRQ_PMC_OVF), NULL);
+ unbind_virq_from_irq(VIRQ_PMC_OVF);
+
+ return ret;
+}
+
+static void pmc_shutdown(void)
+{
+ int ret;
+ pmc_enabled = 0;
+
+ ret = HYPERVISOR_pmc_op(PMC_DISABLE_VIRQ, (u64)NULL, (u64)NULL);
+ //printk(KERN_INFO "pmc_shutdown: disable_virq: ret %d\n", ret);
+
+ if (is_primary) {
+ ret = HYPERVISOR_pmc_op(PMC_RELEASE_COUNTERS, (u64)NULL, (u64)NULL);
+ //printk(KERN_INFO "pmc_shutdown: release_counters: ret %d\n", ret);
+ }
+
+ free_irq(virq_to_phys(VIRQ_PMC_OVF), NULL);
+ unbind_virq_from_irq(VIRQ_PMC_OVF);
+}
+
+static int pmc_start(void)
+{
+ int ret = 0;
+ if (is_primary)
+ ret = HYPERVISOR_pmc_op(PMC_START, (u64)NULL, (u64)NULL);
+ //printk(KERN_INFO "pmc_start: ret %d\n", ret);
+ return ret;
+}
+
+static void pmc_stop(void)
+{
+ int ret = 0;
+ if (is_primary)
+ ret = HYPERVISOR_pmc_op(PMC_STOP, (u64)NULL, (u64)NULL);
+ //printk(KERN_INFO "pmc_stop: ret %d\n", ret);
+ printk(KERN_INFO "pmc: oprofile samples %llu, active %llu, passive %llu, other %llu, buffering losses %llu, NMI restarted %d\n",
+ oprofile_samples, HYPERVISOR_shared_info->active_samples, HYPERVISOR_shared_info->passive_samples,
+ HYPERVISOR_shared_info->other_samples, HYPERVISOR_shared_info->samples_lost, HYPERVISOR_shared_info->nmi_restarts);
+}
+
+static int pmc_set_active(int *active_domains, unsigned int adomains)
+{
+ int ret = 0;
+ if (is_primary)
+ ret = HYPERVISOR_pmc_op(PMC_SET_ACTIVE,
+ (u64)active_domains, (u64)adomains);
+ return ret;
+}
+
+static int pmc_set_passive(int *passive_domains, unsigned int pdomains)
+{
+ int ret = 0;
+ if (is_primary)
+ ret = HYPERVISOR_pmc_op(PMC_SET_PASSIVE,
+ (u64)passive_domains, (u64)pdomains);
+ return ret;
+}
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+static int pmc_create_files(struct super_block * sb, struct dentry * root)
+{
+ unsigned int i;
+
+ for (i = 0; i < num_events; ++i) {
+ struct dentry * dir;
+ char buf[2];
+
+ snprintf(buf, 2, "%d", i);
+ dir = oprofilefs_mkdir(sb, root, buf);
+ oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
+ oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
+ oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
+ oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
+ oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
+ oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
+ }
+
+ //printk(KERN_INFO "pmc_create_files\n");
+ return 0;
+}
+
+
+struct oprofile_operations pmc_ops = {
+ .create_files = pmc_create_files,
+ .set_active = pmc_set_active,
+ .set_passive = pmc_set_passive,
+ .setup = pmc_setup,
+ .shutdown = pmc_shutdown,
+ .start = pmc_start,
+ .stop = pmc_stop
+};
+
+
+static void __init p4_init(void)
+{
+ __u8 cpu_model = current_cpu_data.x86_model;
+
+ if (cpu_model > 3)
+ pmc_ops.cpu_type = "type_unknown";
+
+ /* We always use a non-HT system because that goves us more events */
+ pmc_ops.cpu_type = "i386/p4";
+}
+
+
+static void __init ppro_init(void)
+{
+ __u8 cpu_model = current_cpu_data.x86_model;
+
+ if (cpu_model > 0xd)
+ pmc_ops.cpu_type = "type_unknown";
+
+ if (cpu_model == 9) {
+ pmc_ops.cpu_type = "i386/p6_mobile";
+ } else if (cpu_model > 5) {
+ pmc_ops.cpu_type = "i386/piii";
+ } else if (cpu_model > 2) {
+ pmc_ops.cpu_type = "i386/pii";
+ } else {
+ pmc_ops.cpu_type = "i386/ppro";
+ }
+}
+
+/* in order to get driverfs right */
+static int using_pmc;
+
+int __init oprofile_arch_init(struct oprofile_operations * ops)
+{
+ printk (KERN_INFO "oprofile_arch_init");
+ int ret = HYPERVISOR_pmc_op(PMC_INIT, (u64)&num_events, (u64)&is_primary);
+
+ if (!ret) {
+ __u8 vendor = current_cpu_data.x86_vendor;
+ __u8 family = current_cpu_data.x86;
+
+ if (vendor == X86_VENDOR_INTEL) {
+ switch (family) {
+ /* Pentium IV */
+ case 0xf:
+ p4_init();
+ break;
+ /* A P6-class processor */
+ case 6:
+ ppro_init();
+ break;
+ default:
+ pmc_ops.cpu_type = "type_unknown";
+ }
+ } else pmc_ops.cpu_type = "type_unknown";
+
+ init_driverfs();
+ using_pmc = 1;
+ *ops = pmc_ops;
+ }
+ printk (KERN_INFO "oprofile_arch_init: ret %d, events %d, is_primary %d\n", ret, num_events, is_primary);
+ return ret;
+}
+
+
+void __exit oprofile_arch_exit(void)
+{
+ if (using_pmc)
+ exit_driverfs();
+
+ if (is_primary)
+ HYPERVISOR_pmc_op(PMC_SHUTDOWN, (u64)NULL, (u64)NULL);
+
+}
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/x86_64/Makefile xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/x86_64/Makefile
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/x86_64/Makefile 2005-08-22 19:43:15 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/x86_64/Makefile 2005-08-22 20:17:51 -05:00
@@ -69,7 +69,6 @@ libs-y += arch/x86_64/lib/
core-y += arch/xen/x86_64/kernel/ arch/xen/x86_64/mm/
core-$(CONFIG_IA32_EMULATION) += arch/xen/x86_64/ia32/
drivers-$(CONFIG_PCI) += arch/xen/x86_64/pci/
-drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/
# for clean
obj- += kernel/ mm/ pci/
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c 2005-06-17 14:48:29 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c 2005-08-22 20:17:51 -05:00
@@ -6,6 +6,10 @@
*
* @author John Levon <levon@movementarian.org>
*
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
* This is the core of the buffer management. Each
* CPU buffer is processed and entered into the
* global event buffer. Such processing is necessary
@@ -265,13 +269,30 @@ static void add_cpu_switch(int i)
last_cookie = ~0UL;
}
-static void add_kernel_ctx_switch(unsigned int in_kernel)
+static void add_cpu_mode_switch(unsigned int cpu_mode)
{
add_event_entry(ESCAPE_CODE);
- if (in_kernel)
- add_event_entry(KERNEL_ENTER_SWITCH_CODE);
- else
- add_event_entry(KERNEL_EXIT_SWITCH_CODE);
+ switch (cpu_mode)
+ {
+ case CPU_MODE_USER:
+ add_event_entry(USER_ENTER_SWITCH_CODE);
+ break;
+ case CPU_MODE_KERNEL:
+ add_event_entry(KERNEL_ENTER_SWITCH_CODE);
+ break;
+ case CPU_MODE_XEN:
+ add_event_entry(XEN_ENTER_SWITCH_CODE);
+ break;
+ default:
+ break;
+ }
+}
+
+static void add_dom_switch(int domain_id)
+{
+ add_event_entry(ESCAPE_CODE);
+ add_event_entry(DOMAIN_SWITCH_CODE);
+ add_event_entry(domain_id);
}
static void
@@ -337,10 +358,9 @@ static int add_us_sample(struct mm_struc
* sample is converted into a persistent dentry/offset pair
* for later lookup from userspace.
*/
-static int
-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
+static int add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode)
{
- if (in_kernel) {
+ if (cpu_mode >= CPU_MODE_KERNEL) {
add_sample_entry(s->eip, s->event);
return 1;
} else if (mm) {
@@ -374,6 +394,11 @@ static inline int is_code(unsigned long
{
return val == ESCAPE_CODE;
}
+
+static inline int is_dom_switch(unsigned long val)
+{
+ return val == DOMAIN_SWITCH_ESCAPE_CODE;
+}
/* "acquire" as many cpu buffer slots as we can */
@@ -489,10 +514,11 @@ void sync_buffer(int cpu)
struct mm_struct *mm = NULL;
struct task_struct * new;
unsigned long cookie = 0;
- int in_kernel = 1;
+ int cpu_mode = 1;
unsigned int i;
sync_buffer_state state = sb_buffer_start;
unsigned long available;
+ int domain_switch = 0;
down(&buffer_sem);
@@ -506,12 +532,12 @@ void sync_buffer(int cpu)
struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
if (is_code(s->eip)) {
- if (s->event <= CPU_IS_KERNEL) {
+ if (s->event <= CPU_MODE_MAX) {
/* kernel/userspace switch */
- in_kernel = s->event;
+ cpu_mode = s->event;
if (state == sb_buffer_start)
state = sb_sample_start;
- add_kernel_ctx_switch(s->event);
+ add_cpu_mode_switch(s->event);
} else if (s->event == CPU_TRACE_BEGIN) {
state = sb_bt_start;
add_trace_begin();
@@ -528,11 +554,23 @@ void sync_buffer(int cpu)
add_user_ctx_switch(new, cookie);
}
} else {
- if (state >= sb_bt_start &&
- !add_sample(mm, s, in_kernel)) {
- if (state == sb_bt_start) {
- state = sb_bt_ignore;
- atomic_inc(&oprofile_stats.bt_lost_no_mapping);
+ if (is_dom_switch(s->eip)) {
+ add_dom_switch((int)(s->event));
+ domain_switch = 1;
+ }
+ else {
+ if (domain_switch) {
+ add_sample_entry (s->eip, s->event);
+ domain_switch = 0;
+ }
+ else {
+ if (state >= sb_bt_start &&
+ !add_sample(mm, s, cpu_mode)) {
+ if (state == sb_bt_start) {
+ state = sb_bt_ignore;
+ atomic_inc(&oprofile_stats.bt_lost_no_mapping);
+ }
+ }
}
}
}
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c 2005-06-17 14:48:29 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c 2005-08-22 20:17:51 -05:00
@@ -6,6 +6,10 @@
*
* @author John Levon <levon@movementarian.org>
*
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
* Each CPU has a local buffer that stores PC value/event
* pairs. We also log context switches when we notice them.
* Eventually each CPU's buffer is processed into the global
@@ -58,7 +62,7 @@ int alloc_cpu_buffers(void)
goto fail;
b->last_task = NULL;
- b->last_is_kernel = -1;
+ b->last_cpu_mode = -1;
b->tracing = 0;
b->buffer_size = buffer_size;
b->tail_pos = 0;
@@ -117,7 +121,7 @@ void cpu_buffer_reset(struct oprofile_cp
* collected will populate the buffer with proper
* values to initialize the buffer
*/
- cpu_buf->last_is_kernel = -1;
+ cpu_buf->last_cpu_mode = -1;
cpu_buf->last_task = NULL;
}
@@ -180,7 +184,7 @@ add_code(struct oprofile_cpu_buffer * bu
* events whenever is_kernel changes
*/
static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
- int is_kernel, unsigned long event)
+ int cpu_mode, unsigned long event)
{
struct task_struct * task;
@@ -191,24 +195,39 @@ static int log_sample(struct oprofile_cp
return 0;
}
- is_kernel = !!is_kernel;
+ // Ensure a valid cpu mode
+ if (cpu_mode > CPU_MODE_XEN)
+ return 0;
task = current;
- /* notice a switch from user->kernel or vice versa */
- if (cpu_buf->last_is_kernel != is_kernel) {
- cpu_buf->last_is_kernel = is_kernel;
- add_code(cpu_buf, is_kernel);
- }
- /* notice a task switch */
- if (cpu_buf->last_task != task) {
- cpu_buf->last_task = task;
- add_code(cpu_buf, (unsigned long)task);
+ /* We treat samples from other domains in a special manner:
+ each sample is preceded by a record with eip equal to ~1UL.
+ This record is non-sticky i.e. it holds only for the following
+ sample. The event field of this record stores the domain id.*/
+ if (pc == DOMAIN_SWITCH_ESCAPE_CODE) {
+ add_sample(cpu_buf, pc, event);
+ return 1;
+ } else {
+ /* notice a switch from user->kernel or vice versa */
+ if (cpu_buf->last_cpu_mode != cpu_mode) {
+ cpu_buf->last_cpu_mode = cpu_mode;
+ add_code(cpu_buf, cpu_mode);
+ }
+
+ /* notice a task switch */
+ if (cpu_buf->last_task != task) {
+ cpu_buf->last_task = task;
+ add_code(cpu_buf, (unsigned long)task);
+ }
+
+ /* Note: at this point, we lose the cpu_mode of a sample
+ if it is from another domain */
+
+ add_sample(cpu_buf, pc, event);
+ return 1;
}
-
- add_sample(cpu_buf, pc, event);
- return 1;
}
static int oprofile_begin_trace(struct oprofile_cpu_buffer * cpu_buf)
@@ -229,6 +248,14 @@ static void oprofile_end_trace(struct op
cpu_buf->tracing = 0;
}
+void oprofile_add_sample_xen(unsigned long eip, unsigned int cpu_mode,
+ unsigned long event)
+{
+ struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
+ log_sample(cpu_buf, eip, cpu_mode, event);
+
+
+}
void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
{
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h 2005-06-17 14:48:29 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h 2005-08-22 20:17:51 -05:00
@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer {
volatile unsigned long tail_pos;
unsigned long buffer_size;
struct task_struct * last_task;
- int last_is_kernel;
+ int last_cpu_mode;
int tracing;
struct op_sample * buffer;
unsigned long sample_received;
@@ -51,7 +51,14 @@ extern struct oprofile_cpu_buffer cpu_bu
void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
/* transient events for the CPU buffer -> event buffer */
-#define CPU_IS_KERNEL 1
-#define CPU_TRACE_BEGIN 2
+#define CPU_MODE_USER 0
+#define CPU_MODE_KERNEL 1
+#define CPU_MODE_XEN 2
+#define CPU_MODE_MAX 2
+#define CPU_TRACE_BEGIN 3
+/* special escape code for indicating next sample in the CPU */
+/* buffer is from another Xen domain */
+#define DOMAIN_SWITCH_ESCAPE_CODE ~1UL
+
#endif /* OPROFILE_CPU_BUFFER_H */
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c 2005-06-17 14:48:29 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c 2005-08-22 20:17:51 -05:00
@@ -56,6 +56,7 @@ void add_event_entry(unsigned long value
/* Wake up the waiting process if any. This happens
* on "echo 0 >/dev/oprofile/enable" so the daemon
* processes the data remaining in the event buffer.
+ * also called on echo 1 > /dev/oprofile/dump
*/
void wake_up_buffer_waiter(void)
{
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h 2005-06-17 14:48:29 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h 2005-08-22 20:17:51 -05:00
@@ -5,6 +5,10 @@
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
*/
#ifndef EVENT_BUFFER_H
@@ -29,11 +33,13 @@ void wake_up_buffer_waiter(void);
#define CPU_SWITCH_CODE 2
#define COOKIE_SWITCH_CODE 3
#define KERNEL_ENTER_SWITCH_CODE 4
-#define KERNEL_EXIT_SWITCH_CODE 5
+#define USER_ENTER_SWITCH_CODE 5
#define MODULE_LOADED_CODE 6
#define CTX_TGID_CODE 7
#define TRACE_BEGIN_CODE 8
#define TRACE_END_CODE 9
+#define XEN_ENTER_SWITCH_CODE 10
+#define DOMAIN_SWITCH_CODE 11
/* add data to the event buffer */
void add_event_entry(unsigned long data);
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/oprof.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/oprof.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/oprof.c 2005-06-17 14:48:29 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/oprof.c 2005-08-22 20:17:51 -05:00
@@ -5,6 +5,10 @@
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
*/
#include <linux/kernel.h>
@@ -32,6 +36,25 @@ static DECLARE_MUTEX(start_sem);
1 - use the timer int mechanism regardless
*/
static int timer = 0;
+
+extern unsigned int adomains, pdomains;
+extern int active_domains[MAX_OPROF_DOMAINS], passive_domains[MAX_OPROF_DOMAINS];
+
+int oprofile_set_active(void)
+{
+ if (oprofile_ops.set_active)
+ return oprofile_ops.set_active(active_domains, adomains);
+
+ return -EINVAL;
+}
+
+int oprofile_set_passive(void)
+{
+ if (oprofile_ops.set_passive)
+ return oprofile_ops.set_passive(passive_domains, pdomains);
+
+ return -EINVAL;
+}
int oprofile_setup(void)
{
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c 2005-06-17 14:48:29 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c 2005-08-22 20:17:51 -05:00
@@ -5,10 +5,16 @@
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
*/
#include <linux/fs.h>
#include <linux/oprofile.h>
+#include <linux/pagemap.h>
+#include <linux/ctype.h>
#include "event_buffer.h"
#include "oprofile_stats.h"
@@ -117,11 +123,140 @@ static ssize_t dump_write(struct file *
static struct file_operations dump_fops = {
.write = dump_write,
};
-
+
+#define TMPBUFSIZE 50
+
+unsigned int adomains = 0;
+long active_domains[MAX_OPROF_DOMAINS];
+
+extern int oprofile_set_active(void);
+
+static ssize_t adomain_write(struct file *file, char const __user *buf, size_t count, loff_t * offset)
+{
+ char tmpbuf[TMPBUFSIZE];
+ char *startp = tmpbuf;
+ char *endp = tmpbuf;
+ int i;
+ unsigned long val;
+
+ if (*offset)
+ return -EINVAL;
+ if (!count)
+ return 0;
+ if (count > TMPBUFSIZE - 1)
+ return -EINVAL;
+
+ memset(tmpbuf, 0x0, TMPBUFSIZE);
+
+ if (copy_from_user(tmpbuf, buf, count))
+ return -EFAULT;
+
+ for (i = 0; i < MAX_OPROF_DOMAINS; i++)
+ active_domains[i] = -1;
+ adomains = 0;
+
+ while (1) {
+ val = simple_strtol(startp, &endp, 0);
+ if (endp == startp)
+ break;
+ while (ispunct(*endp))
+ endp++;
+ active_domains[adomains++] = val;
+ if (adomains >= MAX_OPROF_DOMAINS)
+ break;
+ startp = endp;
+ }
+ if (oprofile_set_active())
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t adomain_read(struct file *file, char __user * buf, size_t count, loff_t * offset)
+{
+ char tmpbuf[TMPBUFSIZE];
+ size_t len = 0;
+ int i;
+ /* This is all screwed up if we run out of space */
+ for (i = 0; i < adomains; i++)
+ len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "%u ", (unsigned int)active_domains[i]);
+ len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "\n");
+ return simple_read_from_buffer((void __user *)buf, count, offset, tmpbuf, len);
+}
+
+
+static struct file_operations active_domain_ops = {
+ .read = adomain_read,
+ .write = adomain_write,
+};
+
+unsigned int pdomains = 0;
+long passive_domains[MAX_OPROF_DOMAINS];
+
+extern int oprofile_set_passive(void);
+
+static ssize_t pdomain_write(struct file *file, char const __user *buf, size_t count, loff_t * offset)
+{
+ char tmpbuf[TMPBUFSIZE];
+ char *startp = tmpbuf;
+ char *endp = tmpbuf;
+ int i;
+ unsigned long val;
+
+ if (*offset)
+ return -EINVAL;
+ if (!count)
+ return 0;
+ if (count > TMPBUFSIZE - 1)
+ return -EINVAL;
+
+ memset(tmpbuf, 0x0, TMPBUFSIZE);
+
+ if (copy_from_user(tmpbuf, buf, count))
+ return -EFAULT;
+
+ for (i = 0; i < MAX_OPROF_DOMAINS; i++)
+ passive_domains[i] = -1;
+ pdomains = 0;
+
+ while (1) {
+ val = simple_strtol(startp, &endp, 0);
+ if (endp == startp)
+ break;
+ while (ispunct(*endp))
+ endp++;
+ passive_domains[pdomains++] = val;
+ if (pdomains >= MAX_OPROF_DOMAINS)
+ break;
+ startp = endp;
+ }
+ if (oprofile_set_passive())
+ return -EINVAL;
+ return count;
+}
+
+static ssize_t pdomain_read(struct file *file, char __user * buf, size_t count, loff_t * offset)
+{
+ char tmpbuf[TMPBUFSIZE];
+ size_t len = 0;
+ int i;
+ /* This is all screwed up if we run out of space */
+ for (i = 0; i < pdomains; i++)
+ len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "%u ", (unsigned int)passive_domains[i]);
+ len += snprintf (tmpbuf + len, TMPBUFSIZE - len, "\n");
+ return simple_read_from_buffer((void __user *)buf, count, offset, tmpbuf, len);
+}
+
+static struct file_operations passive_domain_ops = {
+ .read = pdomain_read,
+ .write = pdomain_write,
+};
+
void oprofile_create_files(struct super_block * sb, struct dentry * root)
{
oprofilefs_create_file(sb, root, "enable", &enable_fops);
oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
+ oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
+ oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops);
oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed);
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h 2005-08-22 19:43:16 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h 2005-08-22 20:20:01 -05:00
@@ -576,4 +576,21 @@ HYPERVISOR_vcpu_pickle(
return ret;
}
+
+static inline int
+HYPERVISOR_pmc_op(
+ int op, unsigned int arg1, unsigned int arg2)
+{
+ int ret;
+ unsigned long ign1, ign2, ign3;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a"(ret), "=b"(ign1), "=c"(ign2), "=d"(ign3)
+ : "0"(__HYPERVISOR_pmc_op), "1"(op), "2"(arg1), "3"(arg2)
+ : "memory" );
+
+ return ret;
+}
+
#endif /* __HYPERCALL_H__ */
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h 2005-08-22 19:43:16 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h 2005-08-22 20:17:51 -05:00
@@ -519,4 +519,19 @@ HYPERVISOR_vcpu_pickle(
return ret;
}
+static inline int
+HYPERVISOR_pmc_op(
+ int op, u64 arg1, u64 arg2)
+{
+ int ret;
+
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a"(ret)
+ : "0"(__HYPERVISOR_pmc_op), "D"(op), "S"(arg1), "d"(arg2)
+ : __syscall_clobber );
+
+ return ret;
+}
+
#endif /* __HYPERCALL_H__ */
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/xen-public/xen.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/xen-public/xen.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/xen-public/xen.h 2005-08-22 19:43:14 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/xen-public/xen.h 2005-08-22 20:17:51 -05:00
@@ -4,6 +4,10 @@
* Guest OS interface to Xen.
*
* Copyright (c) 2004, K A Fraser
+ *
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
*/
#ifndef __XEN_PUBLIC_XEN_H__
@@ -59,6 +63,7 @@
#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */
#define __HYPERVISOR_mmuext_op 26
#define __HYPERVISOR_acm_op 27
+#define __HYPERVISOR_pmc_op 28
/*
* VIRTUAL INTERRUPTS
@@ -72,7 +77,8 @@
#define VIRQ_PARITY_ERR 4 /* (DOM0) NMI parity error. */
#define VIRQ_IO_ERR 5 /* (DOM0) NMI I/O error. */
#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */
-#define NR_VIRQS 7
+#define VIRQ_PMC_OVF 7 /* PMC Overflow */
+#define NR_VIRQS 8
/*
* MMU-UPDATE REQUESTS
@@ -240,6 +246,21 @@ struct mmuext_op {
#define VMASST_TYPE_writable_pagetables 2
#define MAX_VMASST_TYPE 2
+/*
+ * Commands to HYPERVISOR_pmc_op().
+ */
+#define PMC_INIT 0
+#define PMC_SET_ACTIVE 1
+#define PMC_SET_PASSIVE 2
+#define PMC_RESERVE_COUNTERS 3
+#define PMC_SETUP_EVENTS 4
+#define PMC_ENABLE_VIRQ 5
+#define PMC_START 6
+#define PMC_STOP 7
+#define PMC_DISABLE_VIRQ 8
+#define PMC_RELEASE_COUNTERS 9
+#define PMC_SHUTDOWN 10
+
#ifndef __ASSEMBLY__
typedef u16 domid_t;
@@ -292,6 +313,8 @@ typedef struct
/* Event channel endpoints per domain. */
#define NR_EVENT_CHANNELS 1024
+#define MAX_OPROF_EVENTS 32
+#define MAX_OPROF_DOMAINS 25
/*
* Per-VCPU information goes here. This will be cleaned up more when Xen
* actually supports multi-VCPU guests.
@@ -407,6 +430,21 @@ typedef struct shared_info {
u32 wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */
arch_shared_info_t arch;
+
+ /* Oprofile structures */
+ u8 event_head;
+ u8 event_tail;
+ struct {
+ u64 eip;
+ u8 mode;
+ u8 event;
+ } event_log[MAX_OPROF_EVENTS];
+ u8 losing_samples;
+ u64 samples_lost;
+ u32 nmi_restarts;
+ u64 active_samples;
+ u64 passive_samples;
+ u64 other_samples;
} shared_info_t;
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/include/linux/oprofile.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/linux/oprofile.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/include/linux/oprofile.h 2005-06-17 14:48:29 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/linux/oprofile.h 2005-08-22 20:17:51 -05:00
@@ -8,6 +8,10 @@
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
*/
#ifndef OPROFILE_H
@@ -27,6 +31,10 @@ struct oprofile_operations {
/* create any necessary configuration files in the oprofile fs.
* Optional. */
int (*create_files)(struct super_block * sb, struct dentry * root);
+ /* setup active domains with Xen */
+ int (*set_active)(int *active_domains, unsigned int adomains);
+ /* setup passive domains with Xen */
+ int (*set_passive)(int *passive_domains, unsigned int pdomains);
/* Do any necessary interrupt setup. Optional. */
int (*setup)(void);
/* Do any necessary interrupt shutdown. Optional. */
@@ -60,6 +68,15 @@ void oprofile_arch_exit(void);
* smp_processor_id() as cpu.
*/
void oprofile_add_sample(struct pt_regs * const regs, unsigned long event);
+
+/**
+ * alternative function to Add a sample for Xen.
+ * It would be better to combine both functions into only one but this would
+ * require getting parameter cpu_mode(old is_kernel) back to
+ * oprofile_add_sample() m(Xen is the best location to determine cpu_mode)
+ */
+extern void oprofile_add_sample_xen(unsigned long eip, unsigned int cpu_mode,
+ unsigned long event);
/* Use this instead when the PC value is not from the regs. Doesn't
* backtrace. */
[-- Attachment #4: Type: text/plain, Size: 138 bytes --]
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel
next reply other threads:[~2005-08-22 15:54 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-08-22 15:54 Andrew Theurer [this message]
-- strict thread matches above, loose matches on Subject: below --
2005-08-22 16:43 [PATCH] xenoprofile x86_64 Santos, Jose Renato G
2005-08-24 21:54 ` Andrew Theurer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4309F548.5020002@us.ibm.com \
--to=habanero@us.ibm.com \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.