All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] xenoprofile x86_64
@ 2005-08-22 15:54 Andrew Theurer
  0 siblings, 0 replies; 3+ messages in thread
From: Andrew Theurer @ 2005-08-22 15:54 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 573 bytes --]

Attached are patches for xenoprofile on x86_64. These are not 
"production ready", but they do work on EM64T so far. I have not added 
support for Opteron just yet (but will very soon). I wanted to get these 
out ASAP in case anyone wanted to try them. There are not too many 
changes from Renato's patches, mainly use of KERNEL_MODE instead of 
RING_1, u64's here and there, and new x86_64 specific files. I have not 
tested these patches on i386 (some changes needed). These should apply 
on changeset 6315.

-Andrew

Signed-off-by: Andrew Theurer <habanero@us.ibm.com>


[-- Attachment #2: xenoprof-1.2-x86_64-xen.patch --]
[-- Type: text/plain, Size: 61825 bytes --]

diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/Makefile ./xen/arch/x86/Makefile
--- ../xen-unstable.hg-6251/xen/arch/x86/Makefile	2005-08-19 23:46:22 -05:00
+++ ./xen/arch/x86/Makefile	2005-08-18 20:28:44 -05:00
@@ -33,7 +33,10 @@ ifneq ($(crash_debug),y)
 OBJS := $(patsubst cdb%.o,,$(OBJS))
 endif
 
+OBJS += oprofile/oprofile.o
+
 default: $(TARGET)
+	make -C oprofile
 
 $(TARGET): $(TARGET)-syms boot/mkelf32
 	./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000
@@ -60,6 +63,9 @@ asm-offsets.s: $(TARGET_SUBARCH)/asm-off
 boot/mkelf32: boot/mkelf32.c
 	$(HOSTCC) $(HOSTCFLAGS) -o $@ $<
 
+oprofile/oprofile.o:
+	$(MAKE) -C oprofile
+
 clean:
 	rm -f *.o *.s *~ core boot/*.o boot/*~ boot/core boot/mkelf32
 	rm -f x86_32/*.o x86_32/*~ x86_32/core
@@ -68,5 +74,6 @@ clean:
 	rm -f acpi/*.o acpi/*~ acpi/core
 	rm -f genapic/*.o genapic/*~ genapic/core
 	rm -f cpu/*.o cpu/*~ cpu/core
+	rm -f oprofile/*.o
 
 .PHONY: default clean
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/nmi.c ./xen/arch/x86/nmi.c
--- ../xen-unstable.hg-6251/xen/arch/x86/nmi.c	2005-08-19 23:46:22 -05:00
+++ ./xen/arch/x86/nmi.c	2005-08-18 20:28:44 -05:00
@@ -5,6 +5,10 @@
  *
  *  Started by Ingo Molnar <mingo@redhat.com>
  *
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
  *  Fixes:
  *  Mikael Pettersson	: AMD K7 support for local APIC NMI watchdog.
  *  Mikael Pettersson	: Power Management for local APIC NMI watchdog.
@@ -35,6 +39,28 @@ static unsigned int nmi_p4_cccr_val;
 static struct ac_timer nmi_timer[NR_CPUS];
 static unsigned int nmi_timer_ticks[NR_CPUS];
 
+/*
+ * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
+ * - it may be reserved by some other driver, or not
+ * - when not reserved by some other driver, it may be used for
+ *   the NMI watchdog, or not
+ *
+ * This is maintained separately from nmi_active because the NMI
+ * watchdog may also be driven from the I/O APIC timer.
+ */
+static spinlock_t lapic_nmi_owner_lock = SPIN_LOCK_UNLOCKED;
+static unsigned int lapic_nmi_owner;
+#define LAPIC_NMI_WATCHDOG      (1<<0)
+#define LAPIC_NMI_RESERVED      (1<<1)
+                                                                                                             
+/* nmi_active:
+ * +1: the lapic NMI watchdog is active, but can be disabled
+ *  0: the lapic NMI watchdog has not been set up, and cannot
+ *     be enabled
+ * -1: the lapic NMI watchdog is disabled, but can be enabled
+ */
+int nmi_active;
+
 #define K7_EVNTSEL_ENABLE	(1 << 22)
 #define K7_EVNTSEL_INT		(1 << 20)
 #define K7_EVNTSEL_OS		(1 << 17)
@@ -66,8 +92,6 @@ static unsigned int nmi_timer_ticks[NR_C
  * max threshold. [IA32-Vol3, Section 14.9.9] 
  */
 #define MSR_P4_IQ_COUNTER0	0x30C
-#define MSR_P4_IQ_CCCR0		0x36C
-#define MSR_P4_CRU_ESCR0	0x3B8 /* ESCR no. 4 */
 #define P4_NMI_CRU_ESCR0	P4_ESCR_EVENT_SELECT(0x3F)
 #define P4_NMI_IQ_CCCR0	\
     (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
@@ -124,6 +148,70 @@ static inline void nmi_pm_init(void) { }
  * Original code written by Keith Owens.
  */
 
+static void disable_lapic_nmi_watchdog(void)
+{
+        if (nmi_active <= 0)
+                return;
+        switch (boot_cpu_data.x86_vendor) {
+        case X86_VENDOR_AMD:
+                wrmsr(MSR_K7_EVNTSEL0, 0, 0);
+                break;
+        case X86_VENDOR_INTEL:
+                switch (boot_cpu_data.x86) {
+                case 6:
+                        wrmsr(MSR_P6_EVNTSEL0, 0, 0);
+                        break;
+                case 15:
+			if ( (smp_num_siblings <= 1) ||
+			     ( (smp_processor_id() % smp_num_siblings) == 0) )
+			{
+                        	wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
+	                        wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
+			} else {
+				wrmsr(MSR_P4_IQ_CCCR1, 0, 0);	
+			}
+                        break;
+                }
+                break;
+        }
+        nmi_active = -1;
+        /* tell do_nmi() and others that we're not active any more */
+        nmi_watchdog = 0;
+}
+
+static void enable_lapic_nmi_watchdog(void)
+{
+        if (nmi_active < 0) {
+                nmi_watchdog = NMI_LOCAL_APIC;
+                setup_apic_nmi_watchdog();
+        }
+}
+
+int reserve_lapic_nmi(void)
+{
+        unsigned int old_owner;
+        spin_lock(&lapic_nmi_owner_lock);
+        old_owner = lapic_nmi_owner;
+        lapic_nmi_owner |= LAPIC_NMI_RESERVED;
+        spin_unlock(&lapic_nmi_owner_lock);
+        if (old_owner & LAPIC_NMI_RESERVED)
+                return -EBUSY;
+        if (old_owner & LAPIC_NMI_WATCHDOG)
+                disable_lapic_nmi_watchdog();
+        return 0;
+}
+
+void release_lapic_nmi(void)
+{
+        unsigned int new_owner;
+        spin_lock(&lapic_nmi_owner_lock);
+        new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED;
+        lapic_nmi_owner = new_owner;
+        spin_unlock(&lapic_nmi_owner_lock);
+        if (new_owner & LAPIC_NMI_WATCHDOG)
+                enable_lapic_nmi_watchdog();
+}
+
 static void __pminit clear_msr_range(unsigned int base, unsigned int n)
 {
     unsigned int i;
@@ -241,6 +329,9 @@ void __pminit setup_apic_nmi_watchdog(vo
 
     init_ac_timer(&nmi_timer[cpu], nmi_timer_fn, NULL, cpu);
 
+    lapic_nmi_owner = LAPIC_NMI_WATCHDOG;
+    nmi_active = 1;
+
     nmi_pm_init();
 }
 
@@ -337,3 +428,7 @@ void nmi_watchdog_tick(struct cpu_user_r
         wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
     }
 }
+
+EXPORT_SYMBOL(reserve_lapic_nmi);
+EXPORT_SYMBOL(release_lapic_nmi);
+
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/Makefile ./xen/arch/x86/oprofile/Makefile
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/Makefile	1969-12-31 18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/Makefile	2005-08-18 20:28:44 -05:00
@@ -0,0 +1,9 @@
+
+include $(BASEDIR)/Rules.mk
+                                     
+default: $(OBJS) 
+	$(LD) $(LDFLAGS) -r -o oprofile.o $(OBJS)
+
+%.o: %.c $(HDRS) Makefile
+	$(CC) $(CFLAGS) -c $< -o $@
+
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/nmi_int.c ./xen/arch/x86/oprofile/nmi_int.c
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/nmi_int.c	1969-12-31 18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/nmi_int.c	2005-08-19 19:32:01 -05:00
@@ -0,0 +1,444 @@
+/**
+ * @file nmi_int.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <xen/event.h>
+#include <xen/types.h>
+#include <xen/errno.h>
+#include <xen/init.h>
+#include <public/xen.h>
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+#include <xen/delay.h>
+ 
+#include "op_counter.h"
+#include "op_x86_model.h"
+ 
+static struct op_x86_model_spec const * model;
+static struct op_msrs cpu_msrs[NR_CPUS];
+static unsigned long saved_lvtpc[NR_CPUS];
+
+#define VIRQ_BITMASK_SIZE	(MAX_OPROF_DOMAINS/32 + 1)
+
+extern int active_domains[MAX_OPROF_DOMAINS];
+extern unsigned int adomains;
+
+extern struct domain * primary_profiler;
+extern struct domain * adomain_ptrs[MAX_OPROF_DOMAINS];
+extern unsigned long virq_ovf_pending[VIRQ_BITMASK_SIZE];
+
+extern int is_active(struct domain *d);
+extern int active_id(struct domain *d);
+extern int is_passive(struct domain *d);
+extern int is_profiled(struct domain *d);
+
+
+int nmi_profiling_started = 0;
+
+int active_virq_count = 0;
+int passive_virq_count = 0;
+int other_virq_count = 0;
+int other_id = -1;
+int xen_count = 0;
+int dom_count = 0; 
+int ovf = 0;
+
+int nmi_callback(struct cpu_user_regs * regs, int cpu)
+{
+	int xen_mode = 0;
+
+	ovf = model->check_ctrs(cpu, &cpu_msrs[cpu], regs);
+	xen_mode = RING_0(regs);
+	if (ovf) {
+		if (xen_mode)
+			xen_count++;
+		else
+			dom_count++;
+
+		if (is_active(current->domain)) {
+		/* This is slightly incorrect. If we do not deliver 
+			OVF virtual interrupts in a synchronous 
+			manner, a process switch may happen in the domain 
+			between the point the sample was collected and 
+			the point at which a VIRQ was delivered. However, 
+			it is not safe to call send_guest_virq from this 
+			NMI context, it may lead to a deadlock since NMIs are 
+			unmaskable. One optimization that we can do is 
+			that if the sample occurs while domain code is 
+			runnng, we know that it is safe to call 
+			send_guest_virq, since we know no Xen code 
+			is running at that time.
+			However, this may distort the sample distribution,
+			because we may lose more Xen mode samples.*/
+			active_virq_count++;
+			if (!xen_mode) {
+				send_guest_virq(current, VIRQ_PMC_OVF);
+				clear_bit(active_id(current->domain), &virq_ovf_pending[0]);
+			} else 
+				set_bit(active_id(current->domain), &virq_ovf_pending[0]);
+			primary_profiler->shared_info->active_samples++;
+		}
+		else if (is_passive(current->domain)) {
+			set_bit(active_id(primary_profiler), &virq_ovf_pending[0]);
+			passive_virq_count++;
+			primary_profiler->shared_info->passive_samples++;
+		}
+		else {
+			other_virq_count++;
+			other_id = current->domain->domain_id;
+			primary_profiler->shared_info->other_samples++;
+		}
+	}
+	return 1;
+}
+
+static void free_msrs(void)
+{
+	int i;
+	for (i = 0; i < NR_CPUS; ++i) {
+		xfree(cpu_msrs[i].counters);
+		cpu_msrs[i].counters = NULL;
+		xfree(cpu_msrs[i].controls);
+		cpu_msrs[i].controls = NULL;
+	}
+}
+ 
+static int allocate_msrs(void)
+{
+	int success = 1;
+	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
+	size_t counters_size = sizeof(struct op_msr) * model->num_counters;
+
+	int i;
+	for (i = 0; i < NR_CPUS; ++i) {
+		//if (!cpu_online(i))
+		if (!test_bit(i, &cpu_online_map))
+			continue;
+
+		cpu_msrs[i].counters = xmalloc_bytes(counters_size);
+		if (!cpu_msrs[i].counters) {
+			success = 0;
+			break;
+		}
+		cpu_msrs[i].controls = xmalloc_bytes(controls_size);
+		if (!cpu_msrs[i].controls) {
+			success = 0;
+			break;
+		}
+	}
+	if (!success)
+		free_msrs();
+
+	return success;
+}
+
+static void nmi_cpu_save_registers(struct op_msrs * msrs)
+{
+	unsigned int const nr_ctrs = model->num_counters;
+	unsigned int const nr_ctrls = model->num_controls; 
+	struct op_msr * counters = msrs->counters;
+	struct op_msr * controls = msrs->controls;
+	unsigned int i;
+
+	for (i = 0; i < nr_ctrs; ++i) {
+		rdmsr(counters[i].addr,
+			counters[i].saved.low,
+			counters[i].saved.high);
+	}
+ 
+	for (i = 0; i < nr_ctrls; ++i) {
+		rdmsr(controls[i].addr,
+			controls[i].saved.low,
+			controls[i].saved.high);
+	}
+}
+
+static void nmi_save_registers(void * dummy)
+{
+	int cpu = smp_processor_id();
+	struct op_msrs * msrs = &cpu_msrs[cpu];
+	model->fill_in_addresses(msrs);
+	nmi_cpu_save_registers(msrs);
+}
+
+int nmi_reserve_counters(void)
+{
+	if (!allocate_msrs())
+		return -ENOMEM;
+
+	/* We walk a thin line between law and rape here.
+	 * We need to be careful to install our NMI handler
+	 * without actually triggering any NMIs as this will
+	 * break the core code horrifically.
+	 */
+	/* Don't we need to do this on all CPUs?*/
+	if (reserve_lapic_nmi() < 0) {
+		free_msrs();
+		return -EBUSY;
+	}
+	/* We need to serialize save and setup for HT because the subset
+	 * of msrs are distinct for save and setup operations
+	 */
+	on_each_cpu(nmi_save_registers, NULL, 0, 1);
+	return 0;
+}
+
+static void nmi_cpu_setup(void * dummy)
+{
+	int cpu = smp_processor_id();
+	struct op_msrs * msrs = &cpu_msrs[cpu];
+	model->setup_ctrs(msrs);
+}
+
+int nmi_setup_events(void)
+{
+	on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
+	return 0;
+}
+
+int nmi_enable_virq()
+{
+	set_nmi_callback(nmi_callback);
+	return 0;
+}
+
+static void nmi_cpu_start(void * dummy)
+{
+	int cpu = smp_processor_id();
+	struct op_msrs const * msrs = &cpu_msrs[cpu];
+	saved_lvtpc[cpu] = apic_read(APIC_LVTPC);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	model->start(msrs);
+}
+
+int nmi_start(void)
+{
+	on_each_cpu(nmi_cpu_start, NULL, 0, 1);
+	nmi_profiling_started = 1;
+	return 0;
+}
+
+static void nmi_cpu_stop(void * dummy)
+{
+	unsigned int v;
+	int cpu = smp_processor_id();
+	struct op_msrs const * msrs = &cpu_msrs[cpu];
+	model->stop(msrs);
+
+	/* restoring APIC_LVTPC can trigger an apic error because the delivery
+	 * mode and vector nr combination can be illegal. That's by design: on
+	 * power on apic lvt contain a zero vector nr which are legal only for
+	 * NMI delivery mode. So inhibit apic err before restoring lvtpc
+	 */
+	if (!(apic_read(APIC_LVTPC) & APIC_DM_NMI)
+		|| (apic_read(APIC_LVTPC) & APIC_LVT_MASKED)) {
+		printk("nmi_stop: APIC not good %ul\n", apic_read(APIC_LVTPC));
+		mdelay(5000);
+	}
+	v = apic_read(APIC_LVTERR);
+	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+	apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
+	apic_write(APIC_LVTERR, v);
+}
+ 
+void nmi_stop(void)
+{
+	nmi_profiling_started = 0;
+	on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
+	active_virq_count = 0;
+	passive_virq_count = 0;
+	other_virq_count = 0;
+	xen_count = 0;
+	dom_count = 0;
+}
+
+extern unsigned int read_ctr(struct op_msrs const * const msrs, int ctr);
+
+void nmi_sanity_check(struct cpu_user_regs *regs, int cpu)
+{
+	int i;
+	int masked = 0;
+
+	/* We may have missed some NMI interrupts if we were already 
+		in an NMI context at that time. If this happens, then 
+		the counters are not reset and in the case of P4, the 
+		APIC LVT disable mask is set. In both cases we end up 
+		losing samples. On P4, this condition can be detected 
+		by checking the APIC LVT mask. But in P6, we need to 
+		examine the counters for overflow. So, every timer 
+		interrupt, we check that everything is OK */
+
+	if (apic_read(APIC_LVTPC) & APIC_LVT_MASKED)
+		masked = 1;
+
+	nmi_callback(regs, cpu);
+
+	if (ovf && masked) {
+		if (is_active(current->domain))
+			current->domain->shared_info->nmi_restarts++;
+		else if (is_passive(current->domain))
+			primary_profiler->shared_info->nmi_restarts++;
+	}
+
+	/*if (jiffies %1000 == 0) {	
+		printk("cpu %d: sample count %d %d %d at %u\n", cpu, active_virq_count, passive_virq_count, other_virq_count, jiffies);
+		printk("other task id %d\n", other_id);
+		printk("%d in xen, %d in domain\n", xen_count, dom_count);
+		printk("counters %p %p\n", read_ctr(&cpu_msrs[cpu], 0), read_ctr(&cpu_msrs[cpu], 1));
+	}*/
+	
+
+	for (i = 0; i < adomains; i++)
+		if (test_and_clear_bit(i, &virq_ovf_pending[0])) {
+		  /* For now we do not support profiling of SMP guests */
+                  /* virq is delivered to first VCPU */  
+		  send_guest_virq(adomain_ptrs[i]->vcpu[0], VIRQ_PMC_OVF);
+		}
+}
+
+void nmi_disable_virq(void)
+{
+	unset_nmi_callback();
+} 
+
+static void nmi_restore_registers(struct op_msrs * msrs)
+{
+	unsigned int const nr_ctrs = model->num_counters;
+	unsigned int const nr_ctrls = model->num_controls; 
+	struct op_msr * counters = msrs->counters;
+	struct op_msr * controls = msrs->controls;
+	unsigned int i;
+
+	for (i = 0; i < nr_ctrls; ++i) {
+		wrmsr(controls[i].addr,
+			controls[i].saved.low,
+			controls[i].saved.high);
+	}
+ 
+	for (i = 0; i < nr_ctrs; ++i) {
+		wrmsr(counters[i].addr,
+			counters[i].saved.low,
+			counters[i].saved.high);
+	}
+}
+ 
+static void nmi_cpu_shutdown(void * dummy)
+{
+	int cpu = smp_processor_id();
+	struct op_msrs * msrs = &cpu_msrs[cpu];
+	nmi_restore_registers(msrs);
+}
+ 
+void nmi_release_counters(void)
+{
+	on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
+	release_lapic_nmi();
+	free_msrs();
+}
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+static int __init p4_init(void)
+{
+	__u8 cpu_model = current_cpu_data.x86_model;
+	printk("cpu model: %d\n", cpu_model);
+	if (cpu_model > 4)
+		return 0;
+
+#ifndef CONFIG_SMP
+	printk("model is op_p4_spec (uniprocessor)\n");
+	model = &op_p4_spec;
+	return 1;
+#else
+	//switch (smp_num_siblings) {
+	printk("model is op_p4_ht2_spec (SMP)\n");
+	if (cpu_has_ht) 
+	{
+	  model = &op_p4_ht2_spec;
+	  return 1;
+	}
+	else
+	{
+	  printk("model is op_p4_spec (SMP)\n");
+	  model = &op_p4_spec;
+	  return 1;
+	}
+#endif
+	return 0;
+}
+
+
+static int __init ppro_init(void)
+{
+	__u8 cpu_model = current_cpu_data.x86_model;
+
+	if (cpu_model > 0xd)
+		return 0;
+
+	model = &op_ppro_spec;
+	return 1;
+}
+
+int nmi_init(int *num_events, int *is_primary)
+{
+	__u8 vendor = current_cpu_data.x86_vendor;
+	__u8 family = current_cpu_data.x86;
+	int prim = 0;
+ 
+	if (!cpu_has_apic) {
+		printk("(XEN) cpu has no APIC\n");
+		return -ENODEV;
+	}
+
+	if (primary_profiler == NULL) {
+		primary_profiler = current->domain;
+		prim = 1;
+	}
+
+	if (primary_profiler != current->domain)
+		goto out;
+
+	printk("cpu vendor: %d\n", vendor);
+	printk("cpu family: %d\n", family);
+
+	switch (vendor) {
+		case X86_VENDOR_INTEL:
+			switch (family) {
+				/* Pentium IV */
+				case 0xf:
+					if (!p4_init())
+						return -ENODEV;
+					break;
+				/* A P6-class processor */
+				case 6:
+					if (!ppro_init())
+						return -ENODEV;
+					break;
+				default:
+					return -ENODEV;
+			}
+			break;
+		default:
+			return -ENODEV;
+	}
+out:
+	if (copy_to_user((void *)num_events, (void *)&model->num_counters, sizeof(int)))
+		return -EFAULT;
+	if (copy_to_user((void *)is_primary, (void *)&prim, sizeof(int)))
+		return -EFAULT;
+
+	return 0;
+}
+
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_counter.h ./xen/arch/x86/oprofile/op_counter.h
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_counter.h	1969-12-31 18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/op_counter.h	2005-08-18 20:28:44 -05:00
@@ -0,0 +1,33 @@
+/**
+ * @file op_counter.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+ 
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+ 
+#define OP_MAX_COUNTER 8
+ 
+/* Per-perfctr configuration as set via
+ * oprofilefs.
+ */
+struct op_counter_config {
+        unsigned long count;
+        unsigned long enabled;
+        unsigned long event;
+        unsigned long kernel;
+        unsigned long user;
+        unsigned long unit_mask;
+};
+
+extern struct op_counter_config counter_config[];
+
+#endif /* OP_COUNTER_H */
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_model_p4.c ./xen/arch/x86/oprofile/op_model_p4.c
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_model_p4.c	1969-12-31 18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/op_model_p4.c	2005-08-19 22:25:07 -05:00
@@ -0,0 +1,748 @@
+/**
+ * @file op_model_p4.c
+ * P4 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <xen/types.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_EVENTS 39
+
+#define NUM_COUNTERS_NON_HT 8
+#define NUM_ESCRS_NON_HT 45
+#define NUM_CCCRS_NON_HT 18
+#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
+
+#define NUM_COUNTERS_HT2 4
+#define NUM_ESCRS_HT2 23
+#define NUM_CCCRS_HT2 9
+#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
+
+static unsigned int num_counters = NUM_COUNTERS_NON_HT;
+
+
+/* this has to be checked dynamically since the
+   hyper-threadedness of a chip is discovered at
+   kernel boot-time. */
+static inline void setup_num_counters(void)
+{
+#ifdef CONFIG_SMP
+	if (cpu_has_ht)
+		num_counters = NUM_COUNTERS_HT2;
+#endif
+}
+
+static int inline addr_increment(void)
+{
+#ifdef CONFIG_SMP
+	return cpu_has_ht ? 2 : 1;
+#else
+	return 1;
+#endif
+}
+
+
+/* tables to simulate simplified hardware view of p4 registers */
+struct p4_counter_binding {
+	int virt_counter;
+	int counter_address;
+	int cccr_address;
+};
+
+struct p4_event_binding {
+	int escr_select;  /* value to put in CCCR */
+	int event_select; /* value to put in ESCR */
+	struct {
+		int virt_counter; /* for this counter... */
+		int escr_address; /* use this ESCR       */
+	} bindings[2];
+};
+
+/* nb: these CTR_* defines are a duplicate of defines in
+   event/i386.p4*events. */
+
+
+#define CTR_BPU_0      (1 << 0)
+#define CTR_MS_0       (1 << 1)
+#define CTR_FLAME_0    (1 << 2)
+#define CTR_IQ_4       (1 << 3)
+#define CTR_BPU_2      (1 << 4)
+#define CTR_MS_2       (1 << 5)
+#define CTR_FLAME_2    (1 << 6)
+#define CTR_IQ_5       (1 << 7)
+
+static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
+	{ CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
+	{ CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
+	{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
+	{ CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
+	{ CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
+	{ CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
+	{ CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
+	{ CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
+};
+
+#define NUM_UNUSED_CCCRS	NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
+
+/* All cccr we don't use. */
+static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
+	MSR_P4_BPU_CCCR1,	MSR_P4_BPU_CCCR3,
+	MSR_P4_MS_CCCR1,	MSR_P4_MS_CCCR3,
+	MSR_P4_FLAME_CCCR1,	MSR_P4_FLAME_CCCR3,
+	MSR_P4_IQ_CCCR0,	MSR_P4_IQ_CCCR1,
+	MSR_P4_IQ_CCCR2,	MSR_P4_IQ_CCCR3
+};
+
+/* p4 event codes in libop/op_event.h are indices into this table. */
+
+static struct p4_event_binding p4_events[NUM_EVENTS] = {
+	
+	{ /* BRANCH_RETIRED */
+		0x05, 0x06, 
+		{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+	
+	{ /* MISPRED_BRANCH_RETIRED */
+		0x04, 0x03, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+	},
+	
+	{ /* TC_DELIVER_MODE */
+		0x01, 0x01,
+		{ { CTR_MS_0, MSR_P4_TC_ESCR0},  
+		  { CTR_MS_2, MSR_P4_TC_ESCR1} }
+	},
+	
+	{ /* BPU_FETCH_REQUEST */
+		0x00, 0x03, 
+		{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
+		  { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
+	},
+
+	{ /* ITLB_REFERENCE */
+		0x03, 0x18,
+		{ { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
+	},
+
+	{ /* MEMORY_CANCEL */
+		0x05, 0x02,
+		{ { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
+	},
+
+	{ /* MEMORY_COMPLETE */
+		0x02, 0x08,
+		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+	},
+
+	{ /* LOAD_PORT_REPLAY */
+		0x02, 0x04, 
+		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+	},
+
+	{ /* STORE_PORT_REPLAY */
+		0x02, 0x05,
+		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+	},
+
+	{ /* MOB_LOAD_REPLAY */
+		0x02, 0x03,
+		{ { CTR_BPU_0, MSR_P4_MOB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
+	},
+
+	{ /* PAGE_WALK_TYPE */
+		0x04, 0x01,
+		{ { CTR_BPU_0, MSR_P4_PMH_ESCR0},
+		  { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
+	},
+
+	{ /* BSQ_CACHE_REFERENCE */
+		0x07, 0x0c, 
+		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+		  { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
+	},
+
+	{ /* IOQ_ALLOCATION */
+		0x06, 0x03, 
+		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+		  { 0, 0 } }
+	},
+
+	{ /* IOQ_ACTIVE_ENTRIES */
+		0x06, 0x1a, 
+		{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
+		  { 0, 0 } }
+	},
+
+	{ /* FSB_DATA_ACTIVITY */
+		0x06, 0x17, 
+		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+	},
+
+	{ /* BSQ_ALLOCATION */
+		0x07, 0x05, 
+		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+		  { 0, 0 } }
+	},
+
+	{ /* BSQ_ACTIVE_ENTRIES */
+		0x07, 0x06,
+		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},  
+		  { 0, 0 } }
+	},
+
+	{ /* X87_ASSIST */
+		0x05, 0x03, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* SSE_INPUT_ASSIST */
+		0x01, 0x34,
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* PACKED_SP_UOP */
+		0x01, 0x08, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* PACKED_DP_UOP */
+		0x01, 0x0c, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* SCALAR_SP_UOP */
+		0x01, 0x0a, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* SCALAR_DP_UOP */
+		0x01, 0x0e,
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* 64BIT_MMX_UOP */
+		0x01, 0x02, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* 128BIT_MMX_UOP */
+		0x01, 0x1a, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* X87_FP_UOP */
+		0x01, 0x04, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* X87_SIMD_MOVES_UOP */
+		0x01, 0x2e, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* MACHINE_CLEAR */
+		0x05, 0x02, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* GLOBAL_POWER_EVENTS */
+		0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
+		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+	},
+  
+	{ /* TC_MS_XFER */
+		0x00, 0x05, 
+		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
+		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
+	},
+
+	{ /* UOP_QUEUE_WRITES */
+		0x00, 0x09,
+		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
+		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
+	},
+
+	{ /* FRONT_END_EVENT */
+		0x05, 0x08,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* EXECUTION_EVENT */
+		0x05, 0x0c,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* REPLAY_EVENT */
+		0x05, 0x09,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* INSTR_RETIRED */
+		0x04, 0x02, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+	},
+
+	{ /* UOPS_RETIRED */
+		0x04, 0x01,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+	},
+
+	{ /* UOP_TYPE */    
+		0x02, 0x02, 
+		{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
+		  { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
+	},
+
+	{ /* RETIRED_MISPRED_BRANCH_TYPE */
+		0x02, 0x05, 
+		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+	},
+
+	{ /* RETIRED_BRANCH_TYPE */
+		0x02, 0x04,
+		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+	}
+};
+
+
+#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
+
+#define ESCR_RESERVED_BITS 0x80000003
+#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
+#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
+#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
+#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
+#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
+#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
+#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
+#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
+#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
+
+#define CCCR_RESERVED_BITS 0x38030FFF
+#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
+#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
+#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
+#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
+#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
+#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
+#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
+#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
+#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
+
+#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
+#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
+#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
+
+
+/* this assigns a "stagger" to the current CPU, which is used throughout
+   the code in this module as an extra array offset, to select the "even"
+   or "odd" part of all the divided resources. */
+static unsigned int get_stagger(void)
+{
+#ifdef CONFIG_SMP
+	/*int cpu = smp_processor_id();
+	return (cpu != first_cpu(cpu_sibling_map[cpu]));*/
+	/* We want the two logical cpus of a physical cpu to use
+	disjoint set of counters. The following code is wrong. */
+	return 0;
+#endif	
+	return 0;
+}
+
+
+/* finally, mediate access to a real hardware counter
+   by passing a "virtual" counter numer to this macro,
+   along with your stagger setting. */
+#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
+
+static unsigned long reset_value[NUM_COUNTERS_NON_HT];
+
+
+static void p4_fill_in_addresses(struct op_msrs * const msrs)
+{
+	unsigned int i; 
+	unsigned int addr, stag;
+
+	setup_num_counters();
+	stag = get_stagger();
+
+	/* the counter registers we pay attention to */
+	for (i = 0; i < num_counters; ++i) {
+		msrs->counters[i].addr = 
+			p4_counters[VIRT_CTR(stag, i)].counter_address;
+	}
+
+	/* FIXME: bad feeling, we don't save the 10 counters we don't use. */
+
+	/* 18 CCCR registers */
+	for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
+	     addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
+		msrs->controls[i].addr = addr;
+	}
+	
+	/* 43 ESCR registers in three or four discontiguous group */
+	for (addr = MSR_P4_BSU_ESCR0 + stag;
+	     addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
+		msrs->controls[i].addr = addr;
+	}
+
+	/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
+	 * to avoid special case in nmi_{save|restore}_registers() */
+	if (boot_cpu_data.x86_model >= 0x3) {
+		for (addr = MSR_P4_BSU_ESCR0 + stag;
+		     addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
+			msrs->controls[i].addr = addr;
+		}
+	} else {
+		for (addr = MSR_P4_IQ_ESCR0 + stag;
+		     addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
+			msrs->controls[i].addr = addr;
+		}
+	}
+
+	for (addr = MSR_P4_RAT_ESCR0 + stag;
+	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+		msrs->controls[i].addr = addr;
+	}
+	
+	for (addr = MSR_P4_MS_ESCR0 + stag;
+	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 
+		msrs->controls[i].addr = addr;
+	}
+	
+	for (addr = MSR_P4_IX_ESCR0 + stag;
+	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 
+		msrs->controls[i].addr = addr;
+	}
+
+	/* there are 2 remaining non-contiguously located ESCRs */
+
+	if (num_counters == NUM_COUNTERS_NON_HT) {		
+		/* standard non-HT CPUs handle both remaining ESCRs*/
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+	} else if (stag == 0) {
+		/* HT CPUs give the first remainder to the even thread, as
+		   the 32nd control register */
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+	} else {
+		/* and two copies of the second to the odd thread,
+		   for the 22st and 23nd control registers */
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+	}
+}
+
+
+static void pmc_setup_one_p4_counter(unsigned int ctr)
+{
+	int i;
+	int const maxbind = 2;
+	unsigned int cccr = 0;
+	unsigned int escr = 0;
+	unsigned int high = 0;
+	unsigned int counter_bit;
+	struct p4_event_binding *ev = NULL;
+	unsigned int stag;
+
+	stag = get_stagger();
+	
+	/* convert from counter *number* to counter *bit* */
+	counter_bit = 1 << VIRT_CTR(stag, ctr);
+	
+	/* find our event binding structure. */
+	if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
+		printk(KERN_ERR 
+		       "oprofile: P4 event code 0x%lx out of range\n", 
+		       counter_config[ctr].event);
+		return;
+	}
+	
+	ev = &(p4_events[counter_config[ctr].event - 1]);
+	
+	for (i = 0; i < maxbind; i++) {
+		if (ev->bindings[i].virt_counter & counter_bit) {
+
+			/* modify ESCR */
+			ESCR_READ(escr, high, ev, i);
+			ESCR_CLEAR(escr);
+			if (stag == 0) {
+				ESCR_SET_USR_0(escr, counter_config[ctr].user);
+				ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
+			} else {
+				ESCR_SET_USR_1(escr, counter_config[ctr].user);
+				ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
+			}
+			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
+			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);			
+			ESCR_WRITE(escr, high, ev, i);
+		       
+			/* modify CCCR */
+			CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
+			CCCR_CLEAR(cccr);
+			CCCR_SET_REQUIRED_BITS(cccr);
+			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
+			if (stag == 0) {
+				CCCR_SET_PMI_OVF_0(cccr);
+			} else {
+				CCCR_SET_PMI_OVF_1(cccr);
+			}
+			CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
+			return;
+		}
+	}
+
+	printk(KERN_ERR 
+	       "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
+	       counter_config[ctr].event, stag, ctr);
+}
+
+
+static void p4_setup_ctrs(struct op_msrs const * const msrs)
+{
+	unsigned int i;
+	unsigned int low, high;
+	unsigned int addr;
+	unsigned int stag;
+
+	stag = get_stagger();
+
+	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
+	if (! MISC_PMC_ENABLED_P(low)) {
+		printk(KERN_ERR "oprofile: P4 PMC not available\n");
+		return;
+	}
+
+	/* clear the cccrs we will use */
+	for (i = 0 ; i < num_counters ; i++) {
+		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+		CCCR_CLEAR(low);
+		CCCR_SET_REQUIRED_BITS(low);
+		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+	}
+
+	/* clear cccrs outside our concern */
+	for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
+		rdmsr(p4_unused_cccr[i], low, high);
+		CCCR_CLEAR(low);
+		CCCR_SET_REQUIRED_BITS(low);
+		wrmsr(p4_unused_cccr[i], low, high);
+	}
+
+	/* clear all escrs (including those outside our concern) */
+	for (addr = MSR_P4_BSU_ESCR0 + stag;
+	     addr <  MSR_P4_IQ_ESCR0; addr += addr_increment()) {
+		wrmsr(addr, 0, 0);
+	}
+
+	/* On older models clear also MSR_P4_IQ_ESCR0/1 */
+	if (boot_cpu_data.x86_model < 0x3) {
+		wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
+		wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
+	}
+
+	for (addr = MSR_P4_RAT_ESCR0 + stag;
+	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+		wrmsr(addr, 0, 0);
+	}
+	
+	for (addr = MSR_P4_MS_ESCR0 + stag;
+	     addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ 
+		wrmsr(addr, 0, 0);
+	}
+	
+	for (addr = MSR_P4_IX_ESCR0 + stag;
+	     addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ 
+		wrmsr(addr, 0, 0);
+	}
+
+	if (num_counters == NUM_COUNTERS_NON_HT) {		
+		wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
+		wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
+	} else if (stag == 0) {
+		wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
+	} else {
+		wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
+	}		
+	
+	/* setup all counters */
+	for (i = 0 ; i < num_counters ; ++i) {
+		if (counter_config[i].enabled) {
+			reset_value[i] = counter_config[i].count;
+			pmc_setup_one_p4_counter(i);
+			CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
+		} else {
+			reset_value[i] = 0;
+		}
+	}
+}
+
+
+extern void pmc_log_event(struct domain *d, u64 eip, int mode, int event);
+extern int is_profiled(struct domain * d);
+extern struct domain * primary_profiler;
+
+static int p4_check_ctrs(unsigned int const cpu, 
+			  struct op_msrs const * const msrs,
+			  struct cpu_user_regs * const regs)
+{
+	unsigned long ctr, low, high, stag, real;
+	int i, ovf = 0;
+	u64 eip = regs->eip;
+	int mode = 0;
+	struct vcpu *v = current;
+
+	//if (RING_1(regs))
+	if (KERNEL_MODE(v, regs))
+		mode = 1;
+	else if (RING_0(regs))
+		mode = 2;
+
+	stag = get_stagger();
+
+	for (i = 0; i < num_counters; ++i) {
+		if (!reset_value[i]) 
+			continue;
+
+		/* 
+		 * there is some eccentricity in the hardware which
+		 * requires that we perform 2 extra corrections:
+		 *
+		 * - check both the CCCR:OVF flag for overflow and the
+		 *   counter high bit for un-flagged overflows.
+		 *
+		 * - write the counter back twice to ensure it gets
+		 *   updated properly.
+		 * 
+		 * the former seems to be related to extra NMIs happening
+		 * during the current NMI; the latter is reported as errata
+		 * N15 in intel doc 249199-029, pentium 4 specification
+		 * update, though their suggested work-around does not
+		 * appear to solve the problem.
+		 */
+		
+		real = VIRT_CTR(stag, i);
+
+		CCCR_READ(low, high, real);
+ 		CTR_READ(ctr, high, real);
+		if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
+			pmc_log_event(current->domain, eip, mode, i);
+			CTR_WRITE(reset_value[i], real);
+			CCCR_CLEAR_OVF(low);
+			CCCR_WRITE(low, high, real);
+			CTR_WRITE(reset_value[i], real);
+			ovf = 1;
+		}
+	}
+
+	/* P4 quirk: you have to re-unmask the apic vector */
+	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+	/* See op_model_ppro.c */
+	return ovf;
+}
+
+
+static void p4_start(struct op_msrs const * const msrs)
+{
+	unsigned int low, high, stag;
+	int i;
+
+	stag = get_stagger();
+
+	for (i = 0; i < num_counters; ++i) {
+		if (!reset_value[i])
+			continue;
+		CCCR_READ(low, high, VIRT_CTR(stag, i));
+		CCCR_SET_ENABLE(low);
+		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+	}
+}
+
+
+static void p4_stop(struct op_msrs const * const msrs)
+{
+	unsigned int low, high, stag;
+	int i;
+
+	stag = get_stagger();
+
+	for (i = 0; i < num_counters; ++i) {
+		CCCR_READ(low, high, VIRT_CTR(stag, i));
+		CCCR_SET_DISABLE(low);
+		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+	}
+}
+
+
+#ifdef CONFIG_SMP
+struct op_x86_model_spec const op_p4_ht2_spec = {
+	.num_counters = NUM_COUNTERS_HT2,
+	.num_controls = NUM_CONTROLS_HT2,
+	.fill_in_addresses = &p4_fill_in_addresses,
+	.setup_ctrs = &p4_setup_ctrs,
+	.check_ctrs = &p4_check_ctrs,
+	.start = &p4_start,
+	.stop = &p4_stop
+};
+#endif
+
+struct op_x86_model_spec const op_p4_spec = {
+	.num_counters = NUM_COUNTERS_NON_HT,
+	.num_controls = NUM_CONTROLS_NON_HT,
+	.fill_in_addresses = &p4_fill_in_addresses,
+	.setup_ctrs = &p4_setup_ctrs,
+	.check_ctrs = &p4_check_ctrs,
+	.start = &p4_start,
+	.stop = &p4_stop
+};
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_model_ppro.c ./xen/arch/x86/oprofile/op_model_ppro.c
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_model_ppro.c	1969-12-31 18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/op_model_ppro.c	2005-08-19 20:36:40 -05:00
@@ -0,0 +1,168 @@
+/**
+ * @file op_model_ppro.h
+ * pentium pro / P6 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <xen/types.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+ 
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_COUNTERS 2
+#define NUM_CONTROLS 2
+
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
+#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+
+#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_CLEAR(x) (x &= (1<<21))
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_EVENT(val, e) (val |= e)
+
+static unsigned long reset_value[NUM_COUNTERS];
+ 
+static void ppro_fill_in_addresses(struct op_msrs * const msrs)
+{
+	msrs->counters[0].addr = MSR_P6_PERFCTR0;
+	msrs->counters[1].addr = MSR_P6_PERFCTR1;
+	
+	msrs->controls[0].addr = MSR_P6_EVNTSEL0;
+	msrs->controls[1].addr = MSR_P6_EVNTSEL1;
+}
+
+
+static void ppro_setup_ctrs(struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+
+	/* clear all counters */
+	for (i = 0 ; i < NUM_CONTROLS; ++i) {
+		CTRL_READ(low, high, msrs, i);
+		CTRL_CLEAR(low);
+		CTRL_WRITE(low, high, msrs, i);
+	}
+	
+	/* avoid a false detection of ctr overflows in NMI handler */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		CTR_WRITE(1, msrs, i);
+	}
+
+	/* enable active counters */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		if (counter_config[i].enabled) {
+			reset_value[i] = counter_config[i].count;
+
+			CTR_WRITE(counter_config[i].count, msrs, i);
+
+			CTRL_READ(low, high, msrs, i);
+			CTRL_CLEAR(low);
+			CTRL_SET_ENABLE(low);
+			CTRL_SET_USR(low, counter_config[i].user);
+			CTRL_SET_KERN(low, counter_config[i].kernel);
+			CTRL_SET_UM(low, counter_config[i].unit_mask);
+			CTRL_SET_EVENT(low, counter_config[i].event);
+			CTRL_WRITE(low, high, msrs, i);
+		}
+	}
+}
+
+extern void pmc_log_event(struct domain *d, u64 eip, int mode, int event);
+extern int is_profiled(struct domain * d);
+extern struct domain * primary_profiler;
+
+static int ppro_check_ctrs(unsigned int const cpu, 
+			    struct op_msrs const * const msrs,
+			    struct cpu_user_regs * const regs)
+{
+	unsigned int low, high;
+	int i, ovf = 0;
+	u64 eip = regs->eip;
+	int mode = 0;
+
+	if (RING_1(regs)) 
+		mode = 1;
+	else if (RING_0(regs))
+		mode = 2;
+
+	for (i = 0 ; i < NUM_COUNTERS; ++i) {
+		CTR_READ(low, high, msrs, i);
+		if (CTR_OVERFLOWED(low)) {
+			pmc_log_event(current->domain, eip, mode, i);
+			CTR_WRITE(reset_value[i], msrs, i);
+			ovf = 1;
+		}
+	}
+
+	/* Only P6 based Pentium M need to re-unmask the apic vector but it
+	 * doesn't hurt other P6 variant */
+	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+	/* We can't work out if we really handled an interrupt. We
+	 * might have caught a *second* counter just after overflowing
+	 * the interrupt for this counter then arrives
+	 * and we don't find a counter that's overflowed, so we
+	 * would return 0 and get dazed + confused. Instead we always
+	 * assume we found an overflow. This sucks.
+	 */
+	return ovf;
+}
+
+ 
+static void ppro_start(struct op_msrs const * const msrs)
+{
+	unsigned int low,high;
+	CTRL_READ(low, high, msrs, 0);
+	CTRL_SET_ACTIVE(low);
+	CTRL_WRITE(low, high, msrs, 0);
+}
+
+static void ppro_stop(struct op_msrs const * const msrs)
+{
+	unsigned int low,high;
+	CTRL_READ(low, high, msrs, 0);
+	CTRL_SET_INACTIVE(low);
+	CTRL_WRITE(low, high, msrs, 0);
+}
+
+unsigned int read_ctr(struct op_msrs const * const msrs, int i)
+{
+	unsigned int low, high;
+	CTR_READ(low, high, msrs, i);
+	return low;
+}
+
+struct op_x86_model_spec const op_ppro_spec = {
+	.num_counters = NUM_COUNTERS,
+	.num_controls = NUM_CONTROLS,
+	.fill_in_addresses = &ppro_fill_in_addresses,
+	.setup_ctrs = &ppro_setup_ctrs,
+	.check_ctrs = &ppro_check_ctrs,
+	.start = &ppro_start,
+	.stop = &ppro_stop
+};
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_x86_model.h ./xen/arch/x86/oprofile/op_x86_model.h
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_x86_model.h	1969-12-31 18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/op_x86_model.h	2005-08-18 20:28:44 -05:00
@@ -0,0 +1,55 @@
+/**
+ * @file op_x86_model.h
+ * interface to x86 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#ifndef OP_X86_MODEL_H
+#define OP_X86_MODEL_H

+
+struct op_saved_msr {
+	unsigned int high;
+	unsigned int low;
+};
+
+struct op_msr {
+	unsigned long addr;
+	struct op_saved_msr saved;
+};
+
+struct op_msrs {
+	struct op_msr * counters;
+	struct op_msr * controls;
+};
+
+struct pt_regs;
+
+/* The model vtable abstracts the differences between
+ * various x86 CPU model's perfctr support.
+ */
+struct op_x86_model_spec {
+	unsigned int const num_counters;
+	unsigned int const num_controls;
+	void (*fill_in_addresses)(struct op_msrs * const msrs);
+	void (*setup_ctrs)(struct op_msrs const * const msrs);
+	int (*check_ctrs)(unsigned int const cpu, 
+		struct op_msrs const * const msrs,
+		struct cpu_user_regs * const regs);
+	void (*start)(struct op_msrs const * const msrs);
+	void (*stop)(struct op_msrs const * const msrs);
+};
+
+extern struct op_x86_model_spec const op_ppro_spec;
+extern struct op_x86_model_spec const op_p4_spec;
+extern struct op_x86_model_spec const op_p4_ht2_spec;
+extern struct op_x86_model_spec const op_athlon_spec;
+
+#endif /* OP_X86_MODEL_H */
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/pmc.c ./xen/arch/x86/oprofile/pmc.c
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/pmc.c	1969-12-31 18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/pmc.c	2005-08-19 20:34:32 -05:00
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * written by Aravind Menon, email: xenoprof@groups.hp.com
+ */
+
+#include <xen/sched.h>
+#include <asm/current.h>
+
+#include "op_counter.h"
+
+int active_domains[MAX_OPROF_DOMAINS];
+int passive_domains[MAX_OPROF_DOMAINS];
+unsigned int adomains = 0;
+unsigned int pdomains = 0;
+unsigned int activated = 0;
+
+#define VIRQ_BITMASK_SIZE	(MAX_OPROF_DOMAINS/32 + 1)
+
+struct domain * primary_profiler = NULL;
+struct domain * adomain_ptrs[MAX_OPROF_DOMAINS];
+unsigned int virq_ovf_pending[VIRQ_BITMASK_SIZE];
+
+int is_active(struct domain *d) 
+{
+	int i;
+	for (i = 0; i < adomains; i++)
+		if (d->domain_id == active_domains[i])
+			return 1;
+	return 0;
+}
+
+int active_id(struct domain *d)
+{
+	int i;
+	for (i = 0; i < adomains; i++)
+		if (d == adomain_ptrs[i])
+			return i;
+	return -1;
+}
+
+void free_adomain_ptrs() 
+{
+	int i;
+	int num = adomains;
+
+	adomains = 0;
+	for (i = 0; i < VIRQ_BITMASK_SIZE; i++)
+		virq_ovf_pending[i] = 0;
+
+	for (i = 0; i < num; i++) {
+		put_domain(adomain_ptrs[i]);
+		adomain_ptrs[i] = NULL;
+	}
+}
+
+int set_adomain_ptrs(int num)
+{
+	int i;
+	struct domain *d;
+
+	for (i = 0; i < VIRQ_BITMASK_SIZE; i++)
+		virq_ovf_pending[i] = 0;
+
+	for (i = 0; i < num; i++) {
+		d = find_domain_by_id(active_domains[i]);
+		if (!d) {
+			free_adomain_ptrs();
+			return -EFAULT;
+		}
+		adomain_ptrs[i] = d;
+		adomains++;
+	}
+	return 0;
+}
+
+int set_active(struct domain *d)
+{
+	if (is_active(d))
+		return 0;
+	/* hack if we run out of space */
+	if (adomains >= MAX_OPROF_DOMAINS) {
+		adomains--;
+		put_domain(adomain_ptrs[adomains]);
+	}
+	active_domains[adomains] = d->domain_id;
+	if (get_domain(d))
+		adomain_ptrs[adomains++] = d;
+	else {
+		free_adomain_ptrs();
+		return -EFAULT;
+	}
+	return 0;
+}
+
+int is_passive(struct domain *d)
+{
+	int i;
+	for (i = 0; i < pdomains; i++)
+		if (d->domain_id == passive_domains[i])
+			return 1;
+	return 0;
+}
+
+int is_profiled(struct domain *d)
+{
+	if (is_active(d) || is_passive(d))
+		return 1;
+	return 0;
+}
+
+void pmc_log_event(struct domain *d, u64 eip, int mode, int event) 
+{
+	shared_info_t *s = NULL;
+	struct domain *dest = d;
+	int head;
+	int tail;
+
+	if (!is_profiled(d))
+		return;
+
+	if (!is_passive(d)) {
+		s = dest->shared_info;
+		head = s->event_head;
+		tail = s->event_tail;
+		if ((head == tail - 1) || 
+		    (head == MAX_OPROF_EVENTS - 1 && tail == 0)) {
+			s->losing_samples = 1;
+			s->samples_lost++;
+		}
+		else {
+			s->event_log[head].eip = eip;
+			s->event_log[head].mode = mode;
+			s->event_log[head].event = event;
+			head++;
+			if (head >= MAX_OPROF_EVENTS)
+				head = 0;
+			s->event_head = head;
+		}
+	}
+	/* passive domains */
+	else {
+		dest = primary_profiler;
+		s = dest->shared_info;
+		head = s->event_head;
+		tail = s->event_tail;
+
+		/* We use the following inefficient format for logging 
+		   events from other domains. We put a special record 
+                   indicating that the next record is for another domain. 
+		   This is done for each sample from another domain */ 
+
+		head = s->event_head;
+		if (head >= MAX_OPROF_EVENTS)
+			head = 0;
+        	/* for passive domains we need to have at least two 
+		   entries empty in the buffer */
+		if ((head == tail - 1) || 
+		    (head == tail - 2) ||
+		    (head == MAX_OPROF_EVENTS - 1 && tail <= 1) ||
+		    (head == MAX_OPROF_EVENTS - 2 && tail == 0) ) {
+			s->losing_samples = 1;
+			s->samples_lost++;
+		}
+		else {
+			s->event_log[head].eip = ~1;
+			s->event_log[head].mode = ~0;
+			s->event_log[head].event = d->domain_id;
+			head++;
+			if (head >= MAX_OPROF_EVENTS)
+				head = 0;
+			s->event_log[head].eip = eip;
+			s->event_log[head].mode = mode;
+			s->event_log[head].event = event;
+			head++;
+			if (head >= MAX_OPROF_EVENTS)
+				head = 0;
+       			s->event_head = head;
+		}
+	}
+}
+
+static void pmc_event_init(struct domain *d)
+{
+	shared_info_t *s = d->shared_info;
+	s->event_head = 0;
+	s->event_tail = 0;
+	s->losing_samples = 0;
+	s->samples_lost = 0;
+	s->nmi_restarts = 0;
+	s->active_samples = 0;
+	s->passive_samples = 0;
+	s->other_samples = 0;
+}
+
+extern int nmi_init(int *num_events, int *is_primary);
+extern int nmi_reserve_counters(void);
+extern int nmi_setup_events(void);
+extern int nmi_enable_virq(void);
+extern int nmi_start(void);
+extern void nmi_stop(void);
+extern void nmi_disable_virq(void);
+extern void nmi_release_counters(void);
+
+#define PRIV_OP(op)	((op == PMC_SET_ACTIVE) || (op == PMC_SET_PASSIVE) || (op == PMC_RESERVE_COUNTERS) \
+			|| (op == PMC_SETUP_EVENTS) || (op == PMC_START) || (op == PMC_STOP) \
+			|| (op == PMC_RELEASE_COUNTERS) || (op == PMC_SHUTDOWN))
+
+int do_pmc_op(int op, u64 arg1, u64 arg2)
+{
+	int ret = 0;
+
+	if (PRIV_OP(op) && current->domain != primary_profiler)
+		return -EPERM;
+
+	switch (op) {
+		case PMC_INIT:
+			printk("PMC_INIT]\n");
+			ret = nmi_init((int *)arg1, (int *)arg2);
+			printk("nmi_init returned %d\n", ret);
+			break;
+
+		case PMC_SET_ACTIVE:
+			printk("PMC_SETACTIVE]\n");
+			if (adomains != 0)
+				return -EPERM;
+			if (copy_from_user((void *)&active_domains,
+				(void *)arg1, arg2*sizeof(int)))
+				return -EFAULT;
+			if (set_adomain_ptrs(arg2))
+				return -EFAULT;
+			if (set_active(current->domain))
+				return -EFAULT;
+			break;
+
+		case PMC_SET_PASSIVE:
+			printk("PMC_SETPASSIVE\n");
+			if (pdomains != 0)
+				return -EPERM;
+			if (copy_from_user((void *)&passive_domains,
+				(void *)arg1, arg2*sizeof(int)))
+				return -EFAULT;
+			pdomains = arg2;
+			break;
+
+		case PMC_RESERVE_COUNTERS:
+			printk("PMC_RESERVE_COUNTERS\n");
+			ret = nmi_reserve_counters();
+			break;
+
+		case PMC_SETUP_EVENTS:
+			printk("PMV_SETUP_EVENTS\n");
+			if (copy_from_user((void *)&counter_config, 
+				(void *)arg1, arg2*sizeof(struct op_counter_config)))
+				return -EFAULT;
+			ret = nmi_setup_events();
+			break;
+
+		case PMC_ENABLE_VIRQ:
+			printk("PMC_ENABLE_VIRQ\n");
+			if (!is_active(current->domain)) {
+				if (current->domain != primary_profiler)
+					return -EPERM;
+				else
+					set_active(current->domain);
+			}
+			ret = nmi_enable_virq();
+			pmc_event_init(current->domain);
+			activated++;
+			break;
+
+		case PMC_START:
+			printk("PMC_START\n");
+			if (activated < adomains)
+				return -EPERM;
+			ret = nmi_start();
+			break;
+
+		case PMC_STOP:
+			printk("PMC_STOP\n");
+			nmi_stop();
+			break;
+
+		case PMC_DISABLE_VIRQ:
+			printk("PMC_DISBALE_VIRQ\n");
+			if (!is_active(current->domain))
+				return -EPERM;
+			nmi_disable_virq();
+			activated--;
+			break;
+
+		case PMC_RELEASE_COUNTERS:
+			printk("PMC_RELEASE_COUNTERS\n");
+			nmi_release_counters();
+			break;
+
+		case PMC_SHUTDOWN:
+			printk("PMC_SHUTDOWN\n");
+			free_adomain_ptrs();
+			pdomains = 0;
+			activated = 0;
+			primary_profiler = NULL;
+			break;
+
+		default:
+			ret = -EINVAL;
+	}
+	return ret;
+}
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/traps.c ./xen/arch/x86/traps.c
--- ../xen-unstable.hg-6251/xen/arch/x86/traps.c	2005-08-19 23:46:22 -05:00
+++ ./xen/arch/x86/traps.c	2005-08-18 20:28:44 -05:00
@@ -2,6 +2,10 @@
  * arch/x86/traps.c
  * 
  * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
+ *
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  * 
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -54,6 +58,7 @@
 #include <asm/debugger.h>
 #include <asm/msr.h>
 #include <asm/x86_emulate.h>
+#include <asm/nmi.h>
 
 /*
  * opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
@@ -1040,7 +1045,7 @@ static void unknown_nmi_error(unsigned c
     printk("Do you have a strange power saving mode enabled?\n");
 }
 
-asmlinkage void do_nmi(struct cpu_user_regs *regs, unsigned long reason)
+static void default_do_nmi(struct cpu_user_regs * regs, unsigned long reason)
 {
     ++nmi_count(smp_processor_id());
 
@@ -1055,6 +1060,35 @@ asmlinkage void do_nmi(struct cpu_user_r
         unknown_nmi_error((unsigned char)(reason&0xff));
 }
 
+static int dummy_nmi_callback(struct cpu_user_regs * regs, int cpu)
+{
+        return 0;
+}
+
+static nmi_callback_t nmi_callback = dummy_nmi_callback;
+
+asmlinkage void do_nmi(struct cpu_user_regs * regs, unsigned long reason)
+{
+	int cpu;
+    cpu = smp_processor_id();
+
+    if (!nmi_callback(regs, cpu)) 
+        default_do_nmi(regs, reason);
+}
+
+void set_nmi_callback(nmi_callback_t callback)
+{
+    nmi_callback = callback;
+}
+ 
+void unset_nmi_callback(void)
+{
+    nmi_callback = dummy_nmi_callback;
+}
+ 
+EXPORT_SYMBOL(set_nmi_callback);
+EXPORT_SYMBOL(unset_nmi_callback);
+ 
 asmlinkage int math_state_restore(struct cpu_user_regs *regs)
 {
     /* Prevent recursion. */
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/x86_32/entry.S ./xen/arch/x86/x86_32/entry.S
--- ../xen-unstable.hg-6251/xen/arch/x86/x86_32/entry.S	2005-08-19 23:46:22 -05:00
+++ ./xen/arch/x86/x86_32/entry.S	2005-08-18 20:28:44 -05:00
@@ -763,7 +763,8 @@ ENTRY(hypercall_table)
         .long do_boot_vcpu
         .long do_ni_hypercall       /* 25 */
         .long do_mmuext_op
-        .long do_acm_op             /* 27 */
+        .long do_acm_op
+	.long do_pmc_op             /* 28 */
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .long do_ni_hypercall
         .endr
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/x86_64/entry.S ./xen/arch/x86/x86_64/entry.S
--- ../xen-unstable.hg-6251/xen/arch/x86/x86_64/entry.S	2005-08-19 23:46:22 -05:00
+++ ./xen/arch/x86/x86_64/entry.S	2005-08-18 20:37:21 -05:00
@@ -593,6 +593,7 @@ ENTRY(hypercall_table)
         .quad do_set_segment_base   /* 25 */
         .quad do_mmuext_op
         .quad do_acm_op
+	.quad do_pmc_op
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .quad do_ni_hypercall
         .endr
diff -Naurp ../xen-unstable.hg-6251/xen/include/asm-x86/msr.h ./xen/include/asm-x86/msr.h
--- ../xen-unstable.hg-6251/xen/include/asm-x86/msr.h	2005-08-19 23:46:23 -05:00
+++ ./xen/include/asm-x86/msr.h	2005-08-18 20:28:44 -05:00
@@ -195,6 +195,89 @@
 #define MSR_P6_EVNTSEL0			0x186
 #define MSR_P6_EVNTSEL1			0x187
 
+/* Pentium IV performance counter MSRs */
+#define MSR_P4_BPU_PERFCTR0 		0x300
+#define MSR_P4_BPU_PERFCTR1 		0x301
+#define MSR_P4_BPU_PERFCTR2 		0x302
+#define MSR_P4_BPU_PERFCTR3 		0x303
+#define MSR_P4_MS_PERFCTR0 		0x304
+#define MSR_P4_MS_PERFCTR1 		0x305
+#define MSR_P4_MS_PERFCTR2 		0x306
+#define MSR_P4_MS_PERFCTR3 		0x307
+#define MSR_P4_FLAME_PERFCTR0 		0x308
+#define MSR_P4_FLAME_PERFCTR1 		0x309
+#define MSR_P4_FLAME_PERFCTR2 		0x30a
+#define MSR_P4_FLAME_PERFCTR3 		0x30b
+#define MSR_P4_IQ_PERFCTR0 		0x30c
+#define MSR_P4_IQ_PERFCTR1 		0x30d
+#define MSR_P4_IQ_PERFCTR2 		0x30e
+#define MSR_P4_IQ_PERFCTR3 		0x30f
+#define MSR_P4_IQ_PERFCTR4 		0x310
+#define MSR_P4_IQ_PERFCTR5 		0x311
+#define MSR_P4_BPU_CCCR0 		0x360
+#define MSR_P4_BPU_CCCR1 		0x361
+#define MSR_P4_BPU_CCCR2 		0x362
+#define MSR_P4_BPU_CCCR3 		0x363
+#define MSR_P4_MS_CCCR0 		0x364
+#define MSR_P4_MS_CCCR1 		0x365
+#define MSR_P4_MS_CCCR2 		0x366
+#define MSR_P4_MS_CCCR3 		0x367
+#define MSR_P4_FLAME_CCCR0 		0x368
+#define MSR_P4_FLAME_CCCR1 		0x369
+#define MSR_P4_FLAME_CCCR2 		0x36a
+#define MSR_P4_FLAME_CCCR3 		0x36b
+#define MSR_P4_IQ_CCCR0 		0x36c
+#define MSR_P4_IQ_CCCR1 		0x36d
+#define MSR_P4_IQ_CCCR2 		0x36e
+#define MSR_P4_IQ_CCCR3 		0x36f
+#define MSR_P4_IQ_CCCR4 		0x370
+#define MSR_P4_IQ_CCCR5 		0x371
+#define MSR_P4_ALF_ESCR0 		0x3ca
+#define MSR_P4_ALF_ESCR1 		0x3cb
+#define MSR_P4_BPU_ESCR0 		0x3b2
+#define MSR_P4_BPU_ESCR1 		0x3b3
+#define MSR_P4_BSU_ESCR0 		0x3a0
+#define MSR_P4_BSU_ESCR1 		0x3a1
+#define MSR_P4_CRU_ESCR0 		0x3b8
+#define MSR_P4_CRU_ESCR1 		0x3b9
+#define MSR_P4_CRU_ESCR2 		0x3cc
+#define MSR_P4_CRU_ESCR3 		0x3cd
+#define MSR_P4_CRU_ESCR4 		0x3e0
+#define MSR_P4_CRU_ESCR5 		0x3e1
+#define MSR_P4_DAC_ESCR0 		0x3a8
+#define MSR_P4_DAC_ESCR1 		0x3a9
+#define MSR_P4_FIRM_ESCR0 		0x3a4
+#define MSR_P4_FIRM_ESCR1 		0x3a5
+#define MSR_P4_FLAME_ESCR0 		0x3a6
+#define MSR_P4_FLAME_ESCR1 		0x3a7
+#define MSR_P4_FSB_ESCR0 		0x3a2
+#define MSR_P4_FSB_ESCR1 		0x3a3
+#define MSR_P4_IQ_ESCR0 		0x3ba
+#define MSR_P4_IQ_ESCR1 		0x3bb
+#define MSR_P4_IS_ESCR0 		0x3b4
+#define MSR_P4_IS_ESCR1 		0x3b5
+#define MSR_P4_ITLB_ESCR0 		0x3b6
+#define MSR_P4_ITLB_ESCR1 		0x3b7
+#define MSR_P4_IX_ESCR0 		0x3c8
+#define MSR_P4_IX_ESCR1 		0x3c9
+#define MSR_P4_MOB_ESCR0 		0x3aa
+#define MSR_P4_MOB_ESCR1 		0x3ab
+#define MSR_P4_MS_ESCR0 		0x3c0
+#define MSR_P4_MS_ESCR1 		0x3c1
+#define MSR_P4_PMH_ESCR0 		0x3ac
+#define MSR_P4_PMH_ESCR1 		0x3ad
+#define MSR_P4_RAT_ESCR0 		0x3bc
+#define MSR_P4_RAT_ESCR1 		0x3bd
+#define MSR_P4_SAAT_ESCR0 		0x3ae
+#define MSR_P4_SAAT_ESCR1 		0x3af
+#define MSR_P4_SSU_ESCR0 		0x3be
+#define MSR_P4_SSU_ESCR1 		0x3bf    /* guess: not defined in manual */
+#define MSR_P4_TBPU_ESCR0 		0x3c2
+#define MSR_P4_TBPU_ESCR1 		0x3c3
+#define MSR_P4_TC_ESCR0 		0x3c4
+#define MSR_P4_TC_ESCR1 		0x3c5
+#define MSR_P4_U2L_ESCR0 		0x3b0
+#define MSR_P4_U2L_ESCR1 		0x3b1
 
 /* K7/K8 MSRs. Not complete. See the architecture manual for a more complete list. */
 #define MSR_K7_EVNTSEL0            0xC0010000
diff -Naurp ../xen-unstable.hg-6251/xen/include/asm-x86/nmi.h ./xen/include/asm-x86/nmi.h
--- ../xen-unstable.hg-6251/xen/include/asm-x86/nmi.h	1969-12-31 18:00:00 -06:00
+++ ./xen/include/asm-x86/nmi.h	2005-08-18 20:28:44 -05:00
@@ -0,0 +1,26 @@
+/*
+ *  linux/include/asm-i386/nmi.h
+ */
+#ifndef ASM_NMI_H
+#define ASM_NMI_H
+
+struct cpu_user_regs;
+ 
+typedef int (*nmi_callback_t)(struct cpu_user_regs * regs, int cpu);
+ 
+/** 
+ * set_nmi_callback
+ *
+ * Set a handler for an NMI. Only one handler may be
+ * set. Return 1 if the NMI was handled.
+ */
+void set_nmi_callback(nmi_callback_t callback);
+ 
+/** 
+ * unset_nmi_callback
+ *
+ * Remove the handler previously set.
+ */
+void unset_nmi_callback(void);
+ 
+#endif /* ASM_NMI_H */
diff -Naurp ../xen-unstable.hg-6251/xen/include/public/xen.h ./xen/include/public/xen.h
--- ../xen-unstable.hg-6251/xen/include/public/xen.h	2005-08-19 23:46:23 -05:00
+++ ./xen/include/public/xen.h	2005-08-19 20:34:10 -05:00
@@ -4,6 +4,10 @@
  * Guest OS interface to Xen.
  * 
  * Copyright (c) 2004, K A Fraser
+ *
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  */
 
 #ifndef __XEN_PUBLIC_XEN_H__
@@ -59,6 +63,7 @@
 #define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
 #define __HYPERVISOR_mmuext_op            26
 #define __HYPERVISOR_acm_op               27
+#define __HYPERVISOR_pmc_op               28
 
 /* 
  * VIRTUAL INTERRUPTS
@@ -72,7 +77,8 @@
 #define VIRQ_PARITY_ERR 4  /* (DOM0) NMI parity error.                    */
 #define VIRQ_IO_ERR     5  /* (DOM0) NMI I/O error.                       */
 #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
-#define NR_VIRQS        7
+#define VIRQ_PMC_OVF	7  /* PMC Overflow */
+#define NR_VIRQS        8
 
 /*
  * MMU-UPDATE REQUESTS
@@ -239,6 +245,21 @@ struct mmuext_op {
 #define VMASST_TYPE_writable_pagetables  2
 #define MAX_VMASST_TYPE 2
 
+/*
+ * Commands to HYPERVISOR_pmc_op().
+ */
+#define PMC_INIT		0
+#define PMC_SET_ACTIVE		1
+#define PMC_SET_PASSIVE		2
+#define PMC_RESERVE_COUNTERS	3
+#define PMC_SETUP_EVENTS	4
+#define PMC_ENABLE_VIRQ		5
+#define PMC_START		6
+#define PMC_STOP		7
+#define PMC_DISABLE_VIRQ	8
+#define PMC_RELEASE_COUNTERS	9
+#define PMC_SHUTDOWN		10
+
 #ifndef __ASSEMBLY__
 
 typedef u16 domid_t;
@@ -291,6 +312,8 @@ typedef struct
 /* Event channel endpoints per domain. */
 #define NR_EVENT_CHANNELS 1024
 
+#define MAX_OPROF_EVENTS	32
+#define MAX_OPROF_DOMAINS	25	
 /*
  * Per-VCPU information goes here. This will be cleaned up more when Xen 
  * actually supports multi-VCPU guests.
@@ -406,6 +429,21 @@ typedef struct shared_info {
     u32 wc_nsec;         /* Nsecs 00:00:00 UTC, Jan 1, 1970.  */
 
     arch_shared_info_t arch;
+
+    /* Oprofile structures */
+    u8 event_head;
+    u8 event_tail;
+    struct {
+	u64 eip;
+	u8 mode;
+	u8 event;
+    } event_log[MAX_OPROF_EVENTS];
+    u8 losing_samples;
+    u64 samples_lost;
+    u32 nmi_restarts;
+    u64 active_samples;
+    u64 passive_samples;
+    u64 other_samples;
 
 } shared_info_t;
 


[-- Attachment #3: xenoprof-1.2-x86_64-linux.patch --]
[-- Type: text/plain, Size: 37710 bytes --]

diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/Kconfig xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/Kconfig
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/Kconfig	2005-08-22 19:43:15 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/Kconfig	2005-08-22 20:17:51 -05:00
@@ -200,4 +200,6 @@ source "crypto/Kconfig"
 
 source "lib/Kconfig"
 
+source "arch/xen/oprofile/Kconfig"
+
 source "arch/xen/Kconfig.debug"
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/Makefile xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/Makefile
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/Makefile	2005-08-22 19:43:15 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/Makefile	2005-08-22 20:17:51 -05:00
@@ -32,6 +32,8 @@ ifneq ($(KBUILD_SRC),)
 	$(Q)ln -fsn ../include/asm-$(XENARCH) include2/asm
 endif
 
+drivers-$(CONFIG_OPROFILE)    += arch/xen/oprofile/
+
 include/.asm-ignore: include/asm
 	@rm -f include/.asm-ignore
 	@mv include/asm include/.asm-ignore
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32 xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32	2005-08-22 19:43:15 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32	2005-08-22 20:17:51 -05:00
@@ -79,6 +79,12 @@ CONFIG_OBSOLETE_MODPARM=y
 CONFIG_KMOD=y
 
 #
+# OProfile options
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+
+#
 # X86 Processor Configuration
 #
 CONFIG_XENARCH="i386"
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32 xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32	2005-08-22 19:43:15 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32	2005-08-22 20:17:51 -05:00
@@ -76,6 +76,12 @@ CONFIG_KMOD=y
 CONFIG_STOP_MACHINE=y
 
 #
+# OProfile options
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+
+#
 # X86 Processor Configuration
 #
 CONFIG_XENARCH="i386"
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/i386/Makefile xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/i386/Makefile
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/i386/Makefile	2005-08-22 19:43:15 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/i386/Makefile	2005-08-22 20:17:51 -05:00
@@ -84,7 +84,6 @@ core-y					+= arch/xen/i386/kernel/ \
 drivers-$(CONFIG_MATH_EMULATION)	+= arch/i386/math-emu/
 drivers-$(CONFIG_PCI)			+= arch/xen/i386/pci/
 # must be linked after kernel/
-drivers-$(CONFIG_OPROFILE)		+= arch/i386/oprofile/
 drivers-$(CONFIG_PM)			+= arch/i386/power/
 
 # for clean
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c	2005-08-22 19:43:15 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c	2005-08-22 20:17:51 -05:00
@@ -44,11 +44,16 @@
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/evtchn.h>
 
+int virq_to_phys(int virq);
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 EXPORT_SYMBOL(force_evtchn_callback);
 EXPORT_SYMBOL(evtchn_do_upcall);
 EXPORT_SYMBOL(bind_evtchn_to_irq);
 EXPORT_SYMBOL(unbind_evtchn_from_irq);
+EXPORT_SYMBOL(virq_to_phys);
+EXPORT_SYMBOL(bind_virq_to_irq);
+EXPORT_SYMBOL(unbind_virq_from_irq);
 #endif
 
 /*
@@ -178,6 +183,15 @@ static int find_unbound_irq(void)
         panic("No available IRQ to bind to: increase NR_IRQS!\n");
 
     return irq;
+}
+
+int virq_to_phys(int virq)
+{
+        int cpu = smp_processor_id();
+
+	if (virq >= NR_VIRQS)
+		return -1;
+	return per_cpu(virq_to_irq,cpu)[virq];
 }
 
 int bind_virq_to_irq(int virq)
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig	1969-12-31 18:00:00 -06:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig	2005-08-22 20:17:51 -05:00
@@ -0,0 +1,23 @@
+
+menu "Profiling support"
+	depends on EXPERIMENTAL
+
+config PROFILING
+	bool "Profiling support (EXPERIMENTAL)"
+	help
+	  Say Y here to enable the extended profiling support mechanisms used
+	  by profilers such as OProfile.
+	  
+
+config OPROFILE
+	tristate "OProfile system profiling (EXPERIMENTAL)"
+	depends on PROFILING
+	help
+	  OProfile is a profiling system capable of profiling the
+	  whole system, include the kernel, kernel modules, libraries,
+	  and applications.
+
+	  If unsure, say N.
+
+endmenu
+
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/Makefile xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/Makefile
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/Makefile	1969-12-31 18:00:00 -06:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/Makefile	2005-08-22 20:17:51 -05:00
@@ -0,0 +1,9 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+		oprof.o cpu_buffer.o buffer_sync.o \
+		event_buffer.o oprofile_files.o \
+		oprofilefs.o oprofile_stats.o  \
+		timer_int.o )
+
+oprofile-y				:= $(DRIVER_OBJS) pmc.o
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h	1969-12-31 18:00:00 -06:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h	2005-08-22 20:17:51 -05:00
@@ -0,0 +1,29 @@
+/**
+ * @file op_counter.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ */
+ 
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+
+#define OP_MAX_COUNTER 8
+ 
+/* Per-perfctr configuration as set via
+ * oprofilefs.
+ */
+struct op_counter_config {
+        unsigned long count;
+        unsigned long enabled;
+        unsigned long event;
+        unsigned long kernel;
+        unsigned long user;
+        unsigned long unit_mask;
+};
+
+extern struct op_counter_config counter_config[];
+
+#endif /* OP_COUNTER_H */
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c	1969-12-31 18:00:00 -06:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c	2005-08-22 20:17:51 -05:00
@@ -0,0 +1,323 @@
+/**
+ * @file nmi_int.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/smp.h>
+#include <linux/oprofile.h>
+#include <linux/sysdev.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+ 
+#include "op_counter.h"
+ 
+static int pmc_start(void);
+static void pmc_stop(void);
+
+/* 0 == registered but off, 1 == registered and on */
+static int pmc_enabled = 0;
+static int num_events = 0;
+static int is_primary = 0;
+
+#ifdef CONFIG_PM
+
+static int pmc_suspend(struct sys_device *dev, u32 state)
+{
+	if (pmc_enabled == 1)
+		pmc_stop();
+	return 0;
+}
+
+
+static int pmc_resume(struct sys_device *dev)
+{
+	if (pmc_enabled == 1)
+		pmc_start();
+	return 0;
+}
+
+
+static struct sysdev_class oprofile_sysclass = {
+	set_kset_name("oprofile"),
+	.resume		= pmc_resume,
+	.suspend	= pmc_suspend,
+};
+
+
+static struct sys_device device_oprofile = {
+	.id	= 0,
+	.cls	= &oprofile_sysclass,
+};
+
+
+static int __init init_driverfs(void)
+{
+	int error;
+	if (!(error = sysdev_class_register(&oprofile_sysclass)))
+		error = sysdev_register(&device_oprofile);
+	return error;
+}
+
+
+static void __exit exit_driverfs(void)
+{
+	sysdev_unregister(&device_oprofile);
+	sysdev_class_unregister(&oprofile_sysclass);
+}
+
+#else
+#define init_driverfs() do { } while (0)
+#define exit_driverfs() do { } while (0)
+#endif /* CONFIG_PM */
+
+unsigned long long oprofile_samples = 0;
+
+static irqreturn_t pmc_ovf_interrupt (int irq, void *dev_id, struct pt_regs *regs)
+{
+	int head, tail;
+	shared_info_t *s = HYPERVISOR_shared_info;
+
+	head = s->event_head;
+	tail = s->event_tail;
+
+	/* oprofile_add_sample will also handle samples from other domains */
+
+	if (tail > head) {
+		while (tail < MAX_OPROF_EVENTS) {
+			oprofile_add_sample_xen(s->event_log[tail].eip, 
+				s->event_log[tail].mode, 
+				s->event_log[tail].event);
+			/*printk(KERN_INFO "pmc_sample: %p, %d, %d\n", 
+				s->event_log[tail].eip, s->event_log[tail].mode,
+				s->event_log[tail].event);*/
+			oprofile_samples++;
+			tail++;
+		}
+		tail = 0;
+	}
+	while (tail < head) {
+		oprofile_add_sample_xen(s->event_log[tail].eip, 
+			s->event_log[tail].mode, s->event_log[tail].event);
+		/*printk(KERN_INFO "pmc_sample: %p, %d, %d\n", 
+			s->event_log[tail].eip, s->event_log[tail].mode,
+			s->event_log[tail].event);*/
+		oprofile_samples++;
+		tail++;
+	}
+
+	s->event_tail = tail;
+	s->losing_samples = 0;
+
+	return IRQ_HANDLED;
+}
+
+extern int virq_to_phys(int virq);
+
+static int pmc_setup(void)
+{
+	int ret;
+
+	if ((ret = request_irq(bind_virq_to_irq(VIRQ_PMC_OVF), 
+		pmc_ovf_interrupt, SA_INTERRUPT, "pmc_ovf", NULL)))
+		goto release_irq;
+
+	if (is_primary) {
+		ret = HYPERVISOR_pmc_op(PMC_RESERVE_COUNTERS, (u64)NULL, (u64)NULL);
+		//printk(KERN_INFO "pmc_setup: reserve_counters: ret %d\n", ret);
+	
+		ret = HYPERVISOR_pmc_op(PMC_SETUP_EVENTS, (u64)&counter_config, (u64)num_events);
+		//printk(KERN_INFO "pmc_setup: setup_events: ret %d\n", ret);
+	}
+
+	ret = HYPERVISOR_pmc_op(PMC_ENABLE_VIRQ, (u64)NULL, (u64)NULL);
+	//printk(KERN_INFO "pmc_setup: enable_virq: ret %d\n", ret);
+
+	pmc_enabled = 1;
+	return 0;
+
+release_irq:
+	free_irq(virq_to_phys(VIRQ_PMC_OVF), NULL);
+	unbind_virq_from_irq(VIRQ_PMC_OVF);
+
+	return ret;
+}
+
+static void pmc_shutdown(void)
+{
+	int ret;
+	pmc_enabled = 0;
+
+	ret = HYPERVISOR_pmc_op(PMC_DISABLE_VIRQ, (u64)NULL, (u64)NULL);
+	//printk(KERN_INFO "pmc_shutdown: disable_virq: ret %d\n", ret);
+
+	if (is_primary) {
+		ret = HYPERVISOR_pmc_op(PMC_RELEASE_COUNTERS, (u64)NULL, (u64)NULL);
+		//printk(KERN_INFO "pmc_shutdown: release_counters: ret %d\n", ret);
+	}
+
+	free_irq(virq_to_phys(VIRQ_PMC_OVF), NULL);
+	unbind_virq_from_irq(VIRQ_PMC_OVF);
+}
+
+static int pmc_start(void)
+{
+	int ret = 0;
+	if (is_primary)
+		ret = HYPERVISOR_pmc_op(PMC_START, (u64)NULL, (u64)NULL);
+	//printk(KERN_INFO "pmc_start: ret %d\n", ret);
+	return ret;
+}
+ 
+static void pmc_stop(void)
+{
+	int ret = 0;
+	if (is_primary)
+		ret = HYPERVISOR_pmc_op(PMC_STOP, (u64)NULL, (u64)NULL);
+	//printk(KERN_INFO "pmc_stop: ret %d\n", ret);
+	printk(KERN_INFO "pmc: oprofile samples %llu, active %llu, passive %llu, other %llu, buffering losses %llu, NMI restarted %d\n", 
+		oprofile_samples, HYPERVISOR_shared_info->active_samples, HYPERVISOR_shared_info->passive_samples,
+		HYPERVISOR_shared_info->other_samples, HYPERVISOR_shared_info->samples_lost, HYPERVISOR_shared_info->nmi_restarts);
+}
+
+static int pmc_set_active(int *active_domains, unsigned int adomains)
+{
+	int ret = 0;
+	if (is_primary) 
+		ret = HYPERVISOR_pmc_op(PMC_SET_ACTIVE, 
+			(u64)active_domains, (u64)adomains); 
+	return ret;
+}
+
+static int pmc_set_passive(int *passive_domains, unsigned int pdomains)
+{
+	int ret = 0;
+	if (is_primary)
+		ret = HYPERVISOR_pmc_op(PMC_SET_PASSIVE,
+			(u64)passive_domains, (u64)pdomains);
+	return ret;
+}
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+static int pmc_create_files(struct super_block * sb, struct dentry * root)
+{
+	unsigned int i;
+
+	for (i = 0; i < num_events; ++i) {
+		struct dentry * dir;
+		char buf[2];
+ 
+		snprintf(buf, 2, "%d", i);
+		dir = oprofilefs_mkdir(sb, root, buf);
+		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); 
+		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); 
+		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); 
+		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); 
+		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); 
+		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); 
+	}
+
+	//printk(KERN_INFO "pmc_create_files\n");
+	return 0;
+}
+ 
+ 
+struct oprofile_operations pmc_ops = {
+	.create_files 	= pmc_create_files,
+	.set_active	= pmc_set_active,
+	.set_passive	= pmc_set_passive,
+	.setup 		= pmc_setup,
+	.shutdown	= pmc_shutdown,
+	.start		= pmc_start,
+	.stop		= pmc_stop
+};
+ 
+
+static void __init p4_init(void)
+{
+	__u8 cpu_model = current_cpu_data.x86_model;
+
+	if (cpu_model > 3)
+		pmc_ops.cpu_type = "type_unknown";
+
+	/* We always use a non-HT system because that goves us more events */
+	pmc_ops.cpu_type = "i386/p4";
+}
+
+
+static void __init ppro_init(void)
+{
+	__u8 cpu_model = current_cpu_data.x86_model;
+
+	if (cpu_model > 0xd)
+		pmc_ops.cpu_type = "type_unknown";
+
+	if (cpu_model == 9) {
+		pmc_ops.cpu_type = "i386/p6_mobile";
+	} else if (cpu_model > 5) {
+		pmc_ops.cpu_type = "i386/piii";
+	} else if (cpu_model > 2) {
+		pmc_ops.cpu_type = "i386/pii";
+	} else {
+		pmc_ops.cpu_type = "i386/ppro";
+	}
+}
+
+/* in order to get driverfs right */
+static int using_pmc;
+
+int __init oprofile_arch_init(struct oprofile_operations * ops)
+{
+	printk (KERN_INFO "oprofile_arch_init");
+	int ret = HYPERVISOR_pmc_op(PMC_INIT, (u64)&num_events, (u64)&is_primary);
+
+	if (!ret) {
+		__u8 vendor = current_cpu_data.x86_vendor;
+		__u8 family = current_cpu_data.x86;
+ 
+		if (vendor == X86_VENDOR_INTEL) {
+			switch (family) {
+				/* Pentium IV */
+				case 0xf:
+					p4_init();
+					break;
+				/* A P6-class processor */
+				case 6:
+					ppro_init();
+					break;
+				default:
+					pmc_ops.cpu_type = "type_unknown";
+			}
+		} else pmc_ops.cpu_type = "type_unknown";
+
+		init_driverfs();
+		using_pmc = 1;
+		*ops = pmc_ops;
+	}
+	printk (KERN_INFO "oprofile_arch_init: ret %d, events %d, is_primary %d\n", ret, num_events, is_primary);
+	return ret;
+}
+
+
+void __exit oprofile_arch_exit(void)
+{
+	if (using_pmc)
+		exit_driverfs();
+
+	if (is_primary)
+		HYPERVISOR_pmc_op(PMC_SHUTDOWN, (u64)NULL, (u64)NULL);
+
+}
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/x86_64/Makefile xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/x86_64/Makefile
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/x86_64/Makefile	2005-08-22 19:43:15 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/x86_64/Makefile	2005-08-22 20:17:51 -05:00
@@ -69,7 +69,6 @@ libs-y 					+= arch/x86_64/lib/
 core-y					+= arch/xen/x86_64/kernel/ arch/xen/x86_64/mm/
 core-$(CONFIG_IA32_EMULATION)		+= arch/xen/x86_64/ia32/
 drivers-$(CONFIG_PCI)			+= arch/xen/x86_64/pci/
-drivers-$(CONFIG_OPROFILE)		+= arch/x86_64/oprofile/
 
 # for clean
 obj-	+= kernel/ mm/ pci/
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c	2005-06-17 14:48:29 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c	2005-08-22 20:17:51 -05:00
@@ -6,6 +6,10 @@
  *
  * @author John Levon <levon@movementarian.org>
  *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
  * This is the core of the buffer management. Each
  * CPU buffer is processed and entered into the
  * global event buffer. Such processing is necessary
@@ -265,13 +269,30 @@ static void add_cpu_switch(int i)
 	last_cookie = ~0UL;
 }
 
-static void add_kernel_ctx_switch(unsigned int in_kernel)
+static void add_cpu_mode_switch(unsigned int cpu_mode)
 {
 	add_event_entry(ESCAPE_CODE);
-	if (in_kernel)
-		add_event_entry(KERNEL_ENTER_SWITCH_CODE); 
-	else
-		add_event_entry(KERNEL_EXIT_SWITCH_CODE); 
+	switch (cpu_mode)
+	{
+	case CPU_MODE_USER:
+		add_event_entry(USER_ENTER_SWITCH_CODE);
+		break;
+	case CPU_MODE_KERNEL:
+		add_event_entry(KERNEL_ENTER_SWITCH_CODE);
+		break;
+	case CPU_MODE_XEN:
+		add_event_entry(XEN_ENTER_SWITCH_CODE);
+		break;
+	default:
+		break;
+	}
+}
+
+static void add_dom_switch(int domain_id)
+{
+	add_event_entry(ESCAPE_CODE);
+	add_event_entry(DOMAIN_SWITCH_CODE);
+	add_event_entry(domain_id);
 }
  
 static void
@@ -337,10 +358,9 @@ static int add_us_sample(struct mm_struc
  * sample is converted into a persistent dentry/offset pair
  * for later lookup from userspace.
  */
-static int
-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
+static int add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode)
 {
-	if (in_kernel) {
+	if (cpu_mode >= CPU_MODE_KERNEL) {
 		add_sample_entry(s->eip, s->event);
 		return 1;
 	} else if (mm) {
@@ -374,6 +394,11 @@ static inline int is_code(unsigned long 
 {
 	return val == ESCAPE_CODE;
 }
+
+static inline int is_dom_switch(unsigned long val)
+{
+	return val == DOMAIN_SWITCH_ESCAPE_CODE;
+}
  
 
 /* "acquire" as many cpu buffer slots as we can */
@@ -489,10 +514,11 @@ void sync_buffer(int cpu)
 	struct mm_struct *mm = NULL;
 	struct task_struct * new;
 	unsigned long cookie = 0;
-	int in_kernel = 1;
+	int cpu_mode = 1;
 	unsigned int i;
 	sync_buffer_state state = sb_buffer_start;
 	unsigned long available;
+	int domain_switch = 0;
 
 	down(&buffer_sem);
  
@@ -506,12 +532,12 @@ void sync_buffer(int cpu)
 		struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
  
 		if (is_code(s->eip)) {
-			if (s->event <= CPU_IS_KERNEL) {
+			if (s->event <= CPU_MODE_MAX) {
 				/* kernel/userspace switch */
-				in_kernel = s->event;
+				cpu_mode = s->event;
 				if (state == sb_buffer_start)
 					state = sb_sample_start;
-				add_kernel_ctx_switch(s->event);
+				add_cpu_mode_switch(s->event);
 			} else if (s->event == CPU_TRACE_BEGIN) {
 				state = sb_bt_start;
 				add_trace_begin();
@@ -528,11 +554,23 @@ void sync_buffer(int cpu)
 				add_user_ctx_switch(new, cookie);
 			}
 		} else {
-			if (state >= sb_bt_start &&
-			    !add_sample(mm, s, in_kernel)) {
-				if (state == sb_bt_start) {
-					state = sb_bt_ignore;
-					atomic_inc(&oprofile_stats.bt_lost_no_mapping);
+			if (is_dom_switch(s->eip)) {
+				add_dom_switch((int)(s->event));
+				domain_switch = 1;
+			}
+			else {
+				if (domain_switch) {
+					add_sample_entry (s->eip, s->event);
+					domain_switch = 0;
+				}
+				else {
+					if (state >= sb_bt_start &&
+			    		    !add_sample(mm, s, cpu_mode)) {
+						if (state == sb_bt_start) {
+							state = sb_bt_ignore;
+							atomic_inc(&oprofile_stats.bt_lost_no_mapping);
+						}
+					}
 				}
 			}
 		}
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c	2005-06-17 14:48:29 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c	2005-08-22 20:17:51 -05:00
@@ -6,6 +6,10 @@
  *
  * @author John Levon <levon@movementarian.org>
  *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
  * Each CPU has a local buffer that stores PC value/event
  * pairs. We also log context switches when we notice them.
  * Eventually each CPU's buffer is processed into the global
@@ -58,7 +62,7 @@ int alloc_cpu_buffers(void)
 			goto fail;
  
 		b->last_task = NULL;
-		b->last_is_kernel = -1;
+		b->last_cpu_mode = -1;
 		b->tracing = 0;
 		b->buffer_size = buffer_size;
 		b->tail_pos = 0;
@@ -117,7 +121,7 @@ void cpu_buffer_reset(struct oprofile_cp
 	 * collected will populate the buffer with proper
 	 * values to initialize the buffer
 	 */
-	cpu_buf->last_is_kernel = -1;
+	cpu_buf->last_cpu_mode = -1;
 	cpu_buf->last_task = NULL;
 }
 
@@ -180,7 +184,7 @@ add_code(struct oprofile_cpu_buffer * bu
  * events whenever is_kernel changes
  */
 static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
-		      int is_kernel, unsigned long event)
+		      int cpu_mode, unsigned long event)
 {
 	struct task_struct * task;
 
@@ -191,24 +195,39 @@ static int log_sample(struct oprofile_cp
 		return 0;
 	}
 
-	is_kernel = !!is_kernel;
+	// Ensure a valid cpu mode
+	if (cpu_mode > CPU_MODE_XEN)
+		return 0;
 
 	task = current;
 
-	/* notice a switch from user->kernel or vice versa */
-	if (cpu_buf->last_is_kernel != is_kernel) {
-		cpu_buf->last_is_kernel = is_kernel;
-		add_code(cpu_buf, is_kernel);
-	}
 
-	/* notice a task switch */
-	if (cpu_buf->last_task != task) {
-		cpu_buf->last_task = task;
-		add_code(cpu_buf, (unsigned long)task);
+	/* We treat samples from other domains in a special manner: 
+           each sample is preceded by a record with eip equal to ~1UL. 
+           This record is non-sticky i.e. it holds only for the following 
+           sample. The event field of this record stores the domain id.*/ 
+	if (pc == DOMAIN_SWITCH_ESCAPE_CODE) {
+		add_sample(cpu_buf, pc, event);
+		return 1;
+	} else {
+		/* notice a switch from user->kernel or vice versa */
+		if (cpu_buf->last_cpu_mode != cpu_mode) {
+			cpu_buf->last_cpu_mode = cpu_mode;
+			add_code(cpu_buf, cpu_mode);
+		}
+
+		/* notice a task switch */
+		if (cpu_buf->last_task != task) {
+			cpu_buf->last_task = task;
+			add_code(cpu_buf, (unsigned long)task);
+		}
+
+		/* Note: at this point, we lose the cpu_mode of a sample
+		   if it is from another domain */
+
+		add_sample(cpu_buf, pc, event);
+		return 1;
 	}
- 
-	add_sample(cpu_buf, pc, event);
-	return 1;
 }
 
 static int oprofile_begin_trace(struct oprofile_cpu_buffer * cpu_buf)
@@ -229,6 +248,14 @@ static void oprofile_end_trace(struct op
 	cpu_buf->tracing = 0;
 }
 
+void oprofile_add_sample_xen(unsigned long eip, unsigned int cpu_mode, 
+	unsigned long event)
+{
+	struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
+	log_sample(cpu_buf, eip, cpu_mode, event);
+
+  
+}
 
 void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
 {
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h	2005-06-17 14:48:29 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h	2005-08-22 20:17:51 -05:00
@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer {
 	volatile unsigned long tail_pos;
 	unsigned long buffer_size;
 	struct task_struct * last_task;
-	int last_is_kernel;
+	int last_cpu_mode;
 	int tracing;
 	struct op_sample * buffer;
 	unsigned long sample_received;
@@ -51,7 +51,14 @@ extern struct oprofile_cpu_buffer cpu_bu
 void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
 
 /* transient events for the CPU buffer -> event buffer */
-#define CPU_IS_KERNEL 1
-#define CPU_TRACE_BEGIN 2
+#define CPU_MODE_USER    0
+#define CPU_MODE_KERNEL  1
+#define CPU_MODE_XEN     2
+#define CPU_MODE_MAX     2
+#define CPU_TRACE_BEGIN  3
 
+/* special escape code for indicating next sample in the CPU */
+/* buffer is from another Xen domain */
+#define DOMAIN_SWITCH_ESCAPE_CODE ~1UL
+ 
 #endif /* OPROFILE_CPU_BUFFER_H */
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c	2005-06-17 14:48:29 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c	2005-08-22 20:17:51 -05:00
@@ -56,6 +56,7 @@ void add_event_entry(unsigned long value
 /* Wake up the waiting process if any. This happens
  * on "echo 0 >/dev/oprofile/enable" so the daemon
  * processes the data remaining in the event buffer.
+ * also called on echo 1 > /dev/oprofile/dump
  */
 void wake_up_buffer_waiter(void)
 {
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h	2005-06-17 14:48:29 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h	2005-08-22 20:17:51 -05:00
@@ -5,6 +5,10 @@
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  */
 
 #ifndef EVENT_BUFFER_H
@@ -29,11 +33,13 @@ void wake_up_buffer_waiter(void);
 #define CPU_SWITCH_CODE 		2
 #define COOKIE_SWITCH_CODE 		3
 #define KERNEL_ENTER_SWITCH_CODE	4
-#define KERNEL_EXIT_SWITCH_CODE		5
+#define USER_ENTER_SWITCH_CODE		5
 #define MODULE_LOADED_CODE		6
 #define CTX_TGID_CODE			7
 #define TRACE_BEGIN_CODE		8
 #define TRACE_END_CODE			9
+#define XEN_ENTER_SWITCH_CODE		10
+#define DOMAIN_SWITCH_CODE		11
  
 /* add data to the event buffer */
 void add_event_entry(unsigned long data);
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/oprof.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/oprof.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/oprof.c	2005-06-17 14:48:29 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/oprof.c	2005-08-22 20:17:51 -05:00
@@ -5,6 +5,10 @@
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  */
 
 #include <linux/kernel.h>
@@ -32,6 +36,25 @@ static DECLARE_MUTEX(start_sem);
    1 - use the timer int mechanism regardless
  */
 static int timer = 0;
+
+extern unsigned int adomains, pdomains;
+extern int active_domains[MAX_OPROF_DOMAINS], passive_domains[MAX_OPROF_DOMAINS];
+
+int oprofile_set_active(void)
+{
+	if (oprofile_ops.set_active)
+		return oprofile_ops.set_active(active_domains, adomains);
+
+	return -EINVAL;
+}
+
+int oprofile_set_passive(void)
+{
+	if (oprofile_ops.set_passive)
+		return oprofile_ops.set_passive(passive_domains, pdomains);
+
+	return -EINVAL;
+}
 
 int oprofile_setup(void)
 {
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c	2005-06-17 14:48:29 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c	2005-08-22 20:17:51 -05:00
@@ -5,10 +5,16 @@
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.	
  */
 
 #include <linux/fs.h>
 #include <linux/oprofile.h>
+#include <linux/pagemap.h>
+#include <linux/ctype.h>
 
 #include "event_buffer.h"
 #include "oprofile_stats.h"
@@ -117,11 +123,140 @@ static ssize_t dump_write(struct file * 
 static struct file_operations dump_fops = {
 	.write		= dump_write,
 };
- 
+
+#define TMPBUFSIZE 50
+
+unsigned int adomains = 0;
+long active_domains[MAX_OPROF_DOMAINS];
+
+extern int oprofile_set_active(void);
+
+static ssize_t adomain_write(struct file *file, char const __user *buf, size_t count, loff_t * offset)
+{
+	char tmpbuf[TMPBUFSIZE];
+	char *startp = tmpbuf;
+	char *endp = tmpbuf;
+	int i;
+	unsigned long val;
+	
+	if (*offset)
+		return -EINVAL;	
+	if (!count)
+		return 0;
+	if (count > TMPBUFSIZE - 1)
+		return -EINVAL;
+
+	memset(tmpbuf, 0x0, TMPBUFSIZE);
+
+	if (copy_from_user(tmpbuf, buf, count))
+		return -EFAULT;
+	
+	for (i = 0; i < MAX_OPROF_DOMAINS; i++)
+		active_domains[i] = -1;
+	adomains = 0;
+
+	while (1) {
+		val = simple_strtol(startp, &endp, 0);
+		if (endp == startp)
+			break;
+		while (ispunct(*endp))
+			endp++;
+		active_domains[adomains++] = val;
+		if (adomains >= MAX_OPROF_DOMAINS)
+			break;
+		startp = endp;
+	}
+	if (oprofile_set_active())
+		return -EINVAL; 
+	return count;
+}
+
+static ssize_t adomain_read(struct file *file, char __user * buf, size_t count, loff_t * offset)
+{
+	char tmpbuf[TMPBUFSIZE];
+	size_t len = 0;
+	int i;
+	/* This is all screwed up if we run out of space */
+	for (i = 0; i < adomains; i++) 
+		len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "%u ", (unsigned int)active_domains[i]);
+	len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "\n");
+	return simple_read_from_buffer((void __user *)buf, count, offset, tmpbuf, len);
+}
+
+
+static struct file_operations active_domain_ops = {
+	.read		= adomain_read,
+	.write		= adomain_write,
+};
+
+unsigned int pdomains = 0;
+long passive_domains[MAX_OPROF_DOMAINS];
+
+extern int oprofile_set_passive(void);
+
+static ssize_t pdomain_write(struct file *file, char const __user *buf, size_t count, loff_t * offset)
+{
+	char tmpbuf[TMPBUFSIZE];
+	char *startp = tmpbuf;
+	char *endp = tmpbuf;
+	int i;
+	unsigned long val;
+	
+	if (*offset)
+		return -EINVAL;	
+	if (!count)
+		return 0;
+	if (count > TMPBUFSIZE - 1)
+		return -EINVAL;
+
+	memset(tmpbuf, 0x0, TMPBUFSIZE);
+
+	if (copy_from_user(tmpbuf, buf, count))
+		return -EFAULT;
+	
+	for (i = 0; i < MAX_OPROF_DOMAINS; i++)
+		passive_domains[i] = -1;
+	pdomains = 0;
+
+	while (1) {
+		val = simple_strtol(startp, &endp, 0);
+		if (endp == startp)
+			break;
+		while (ispunct(*endp))
+			endp++;
+		passive_domains[pdomains++] = val;
+		if (pdomains >= MAX_OPROF_DOMAINS)
+			break;
+		startp = endp;
+	}
+	if (oprofile_set_passive())
+		return -EINVAL; 
+	return count;
+}
+
+static ssize_t pdomain_read(struct file *file, char __user * buf, size_t count, loff_t * offset)
+{
+	char tmpbuf[TMPBUFSIZE];
+	size_t len = 0;
+	int i;
+	/* This is all screwed up if we run out of space */
+	for (i = 0; i < pdomains; i++) 
+		len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "%u ", (unsigned int)passive_domains[i]);
+	len += snprintf (tmpbuf + len, TMPBUFSIZE - len, "\n");
+	return simple_read_from_buffer((void __user *)buf, count, offset, tmpbuf, len);
+}
+
+static struct file_operations passive_domain_ops = {
+	.read		= pdomain_read,
+	.write		= pdomain_write,
+};
+
 void oprofile_create_files(struct super_block * sb, struct dentry * root)
 {
 	oprofilefs_create_file(sb, root, "enable", &enable_fops);
 	oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
+	oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
+	oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops);
 	oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
 	oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
 	oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed);
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h	2005-08-22 19:43:16 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h	2005-08-22 20:20:01 -05:00
@@ -576,4 +576,21 @@ HYPERVISOR_vcpu_pickle(
     return ret;
 }
 
+
+static inline int
+HYPERVISOR_pmc_op(
+	int op, unsigned int arg1, unsigned int arg2)
+{
+	int ret;
+	unsigned long ign1, ign2, ign3;
+
+	__asm__ __volatile__ (
+	       TRAP_INSTR
+	       : "=a"(ret), "=b"(ign1), "=c"(ign2), "=d"(ign3)
+	       : "0"(__HYPERVISOR_pmc_op), "1"(op), "2"(arg1), "3"(arg2)
+	       : "memory" );
+
+	return ret;
+}
+
 #endif /* __HYPERCALL_H__ */
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h	2005-08-22 19:43:16 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h	2005-08-22 20:17:51 -05:00
@@ -519,4 +519,19 @@ HYPERVISOR_vcpu_pickle(
     return ret;
 }
 
+static inline int
+HYPERVISOR_pmc_op(
+	int op, u64 arg1, u64 arg2)
+{
+	int ret;
+
+	__asm__ __volatile__ (
+		TRAP_INSTR
+		: "=a"(ret)
+		: "0"(__HYPERVISOR_pmc_op), "D"(op), "S"(arg1), "d"(arg2)
+		: __syscall_clobber );
+
+	return ret;
+}
+
 #endif /* __HYPERCALL_H__ */
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/xen-public/xen.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/xen-public/xen.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/xen-public/xen.h	2005-08-22 19:43:14 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/xen-public/xen.h	2005-08-22 20:17:51 -05:00
@@ -4,6 +4,10 @@
  * Guest OS interface to Xen.
  * 
  * Copyright (c) 2004, K A Fraser
+ *
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  */
 
 #ifndef __XEN_PUBLIC_XEN_H__
@@ -59,6 +63,7 @@
 #define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
 #define __HYPERVISOR_mmuext_op            26
 #define __HYPERVISOR_acm_op               27
+#define __HYPERVISOR_pmc_op               28
 
 /* 
  * VIRTUAL INTERRUPTS
@@ -72,7 +77,8 @@
 #define VIRQ_PARITY_ERR 4  /* (DOM0) NMI parity error.                    */
 #define VIRQ_IO_ERR     5  /* (DOM0) NMI I/O error.                       */
 #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
-#define NR_VIRQS        7
+#define VIRQ_PMC_OVF	7  /* PMC Overflow */
+#define NR_VIRQS        8
 
 /*
  * MMU-UPDATE REQUESTS
@@ -240,6 +246,21 @@ struct mmuext_op {
 #define VMASST_TYPE_writable_pagetables  2
 #define MAX_VMASST_TYPE 2
 
+/*
+ * Commands to HYPERVISOR_pmc_op().
+ */
+#define PMC_INIT		0
+#define PMC_SET_ACTIVE		1
+#define PMC_SET_PASSIVE		2
+#define PMC_RESERVE_COUNTERS	3
+#define PMC_SETUP_EVENTS	4
+#define PMC_ENABLE_VIRQ		5
+#define PMC_START		6
+#define PMC_STOP		7
+#define PMC_DISABLE_VIRQ	8
+#define PMC_RELEASE_COUNTERS	9
+#define PMC_SHUTDOWN		10
+
 #ifndef __ASSEMBLY__
 
 typedef u16 domid_t;
@@ -292,6 +313,8 @@ typedef struct
 /* Event channel endpoints per domain. */
 #define NR_EVENT_CHANNELS 1024
 
+#define MAX_OPROF_EVENTS	32
+#define MAX_OPROF_DOMAINS	25	
 /*
  * Per-VCPU information goes here. This will be cleaned up more when Xen 
  * actually supports multi-VCPU guests.
@@ -407,6 +430,21 @@ typedef struct shared_info {
     u32 wc_nsec;         /* Nsecs 00:00:00 UTC, Jan 1, 1970.  */
 
     arch_shared_info_t arch;
+
+    /* Oprofile structures */
+    u8 event_head;
+    u8 event_tail;
+    struct {
+	u64 eip;
+	u8 mode;
+	u8 event;
+    } event_log[MAX_OPROF_EVENTS];
+    u8 losing_samples;
+    u64 samples_lost;
+    u32 nmi_restarts;
+    u64 active_samples;
+    u64 passive_samples;
+    u64 other_samples;
 
 } shared_info_t;
 
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/include/linux/oprofile.h xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/linux/oprofile.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/include/linux/oprofile.h	2005-06-17 14:48:29 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/linux/oprofile.h	2005-08-22 20:17:51 -05:00
@@ -8,6 +8,10 @@
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  */
 
 #ifndef OPROFILE_H
@@ -27,6 +31,10 @@ struct oprofile_operations {
 	/* create any necessary configuration files in the oprofile fs.
 	 * Optional. */
 	int (*create_files)(struct super_block * sb, struct dentry * root);
+	/* setup active domains with Xen */
+	int (*set_active)(int *active_domains, unsigned int adomains);
+	/* setup passive domains with Xen */
+	int (*set_passive)(int *passive_domains, unsigned int pdomains);
 	/* Do any necessary interrupt setup. Optional. */
 	int (*setup)(void);
 	/* Do any necessary interrupt shutdown. Optional. */
@@ -60,6 +68,15 @@ void oprofile_arch_exit(void);
  * smp_processor_id() as cpu.
  */
 void oprofile_add_sample(struct pt_regs * const regs, unsigned long event);
+
+/**
+ * alternative function to Add a sample for Xen. 
+ * It would be better to combine both functions into only one but this would 
+ * require getting parameter cpu_mode(old is_kernel) back to 
+ * oprofile_add_sample() m(Xen is the best location to determine cpu_mode)
+ */
+extern void oprofile_add_sample_xen(unsigned long eip, unsigned int cpu_mode, 
+	unsigned long event);
 
 /* Use this instead when the PC value is not from the regs. Doesn't
  * backtrace. */


[-- Attachment #4: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [PATCH] xenoprofile x86_64
@ 2005-08-22 16:43 Santos, Jose Renato G
  2005-08-24 21:54 ` Andrew Theurer
  0 siblings, 1 reply; 3+ messages in thread
From: Santos, Jose Renato G @ 2005-08-22 16:43 UTC (permalink / raw)
  To: Andrew Theurer, xen-devel


  Good news Andrew. Thanks
  I will also include your changes on the new xenoprof version
  with SMP guest support when that is ready.

  Renato
  

>> -----Original Message-----
>> From: xen-devel-bounces@lists.xensource.com 
>> [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of 
>> Andrew Theurer
>> Sent: Monday, August 22, 2005 8:55 AM
>> To: xen-devel@lists.xensource.com
>> Subject: [Xen-devel] [PATCH] xenoprofile x86_64
>> 
>> 
>> Attached are patches for xenoprofile on x86_64. These are not 
>> "production ready", but they do work on EM64T so far. I have 
>> not added 
>> support for Opteron just yet (but will very soon). I wanted 
>> to get these 
>> out ASAP in case anyone wanted to try them. There are not too many 
>> changes from Renato's patches, mainly use of KERNEL_MODE instead of 
>> RING_1, u64's here and there, and new x86_64 specific files. 
>> I have not 
>> tested these patches on i386 (some changes needed). These 
>> should apply 
>> on changeset 6315.
>> 
>> -Andrew
>> 
>> Signed-off-by: Andrew Theurer <habanero@us.ibm.com>
>> 
>> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] xenoprofile x86_64
  2005-08-22 16:43 Santos, Jose Renato G
@ 2005-08-24 21:54 ` Andrew Theurer
  0 siblings, 0 replies; 3+ messages in thread
From: Andrew Theurer @ 2005-08-24 21:54 UTC (permalink / raw)
  To: xen-devel; +Cc: Santos, Jose Renato G

[-- Attachment #1: Type: text/plain, Size: 312 bytes --]

On Monday 22 August 2005 11:43, Santos, Jose Renato G wrote:
>   Good news Andrew. Thanks
>   I will also include your changes on the new xenoprof version
>   with SMP guest support when that is ready.

New patches attached, added support for AMD64

-Andrew

Signed-off-by: Andrew Theurer <habanero@us.ibm.com>


[-- Attachment #2: xenoprof-1.2-x86_64-linux.patch2 --]
[-- Type: text/x-diff, Size: 38420 bytes --]

diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32 xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32	2005-08-24 07:44:51.000000000 -0500
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32	2005-08-23 07:05:17.000000000 -0500
@@ -79,6 +79,12 @@ CONFIG_OBSOLETE_MODPARM=y
 CONFIG_KMOD=y
 
 #
+# OProfile options
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+
+#
 # X86 Processor Configuration
 #
 CONFIG_XENARCH="i386"
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32 xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32	2005-08-24 07:44:51.000000000 -0500
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32	2005-08-23 07:05:17.000000000 -0500
@@ -76,6 +76,12 @@ CONFIG_KMOD=y
 CONFIG_STOP_MACHINE=y
 
 #
+# OProfile options
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+
+#
 # X86 Processor Configuration
 #
 CONFIG_XENARCH="i386"
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/i386/Makefile xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/i386/Makefile
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/i386/Makefile	2005-08-24 07:44:51.000000000 -0500
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/i386/Makefile	2005-08-23 07:05:17.000000000 -0500
@@ -84,7 +84,6 @@ core-y					+= arch/xen/i386/kernel/ \
 drivers-$(CONFIG_MATH_EMULATION)	+= arch/i386/math-emu/
 drivers-$(CONFIG_PCI)			+= arch/xen/i386/pci/
 # must be linked after kernel/
-drivers-$(CONFIG_OPROFILE)		+= arch/i386/oprofile/
 drivers-$(CONFIG_PM)			+= arch/i386/power/
 
 # for clean
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/Kconfig xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/Kconfig
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/Kconfig	2005-08-24 07:44:51.000000000 -0500
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/Kconfig	2005-08-23 07:05:17.000000000 -0500
@@ -200,4 +200,6 @@ source "crypto/Kconfig"
 
 source "lib/Kconfig"
 
+source "arch/xen/oprofile/Kconfig"
+
 source "arch/xen/Kconfig.debug"
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c	2005-08-24 07:44:51.000000000 -0500
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c	2005-08-23 07:05:17.000000000 -0500
@@ -44,11 +44,16 @@
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/evtchn.h>
 
+int virq_to_phys(int virq);
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 EXPORT_SYMBOL(force_evtchn_callback);
 EXPORT_SYMBOL(evtchn_do_upcall);
 EXPORT_SYMBOL(bind_evtchn_to_irq);
 EXPORT_SYMBOL(unbind_evtchn_from_irq);
+EXPORT_SYMBOL(virq_to_phys);
+EXPORT_SYMBOL(bind_virq_to_irq);
+EXPORT_SYMBOL(unbind_virq_from_irq);
 #endif
 
 /*
@@ -180,6 +185,15 @@ static int find_unbound_irq(void)
     return irq;
 }
 
+int virq_to_phys(int virq)
+{
+        int cpu = smp_processor_id();
+
+	if (virq >= NR_VIRQS)
+		return -1;
+	return per_cpu(virq_to_irq,cpu)[virq];
+}
+
 int bind_virq_to_irq(int virq)
 {
     evtchn_op_t op;
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/Makefile xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/Makefile
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/Makefile	2005-08-24 07:44:51.000000000 -0500
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/Makefile	2005-08-23 07:05:17.000000000 -0500
@@ -32,6 +32,8 @@ ifneq ($(KBUILD_SRC),)
 	$(Q)ln -fsn ../include/asm-$(XENARCH) include2/asm
 endif
 
+drivers-$(CONFIG_OPROFILE)    += arch/xen/oprofile/
+
 include/.asm-ignore: include/asm
 	@rm -f include/.asm-ignore
 	@mv include/asm include/.asm-ignore
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig	1969-12-31 18:00:00.000000000 -0600
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig	2005-08-23 07:05:17.000000000 -0500
@@ -0,0 +1,23 @@
+
+menu "Profiling support"
+	depends on EXPERIMENTAL
+
+config PROFILING
+	bool "Profiling support (EXPERIMENTAL)"
+	help
+	  Say Y here to enable the extended profiling support mechanisms used
+	  by profilers such as OProfile.
+	  
+
+config OPROFILE
+	tristate "OProfile system profiling (EXPERIMENTAL)"
+	depends on PROFILING
+	help
+	  OProfile is a profiling system capable of profiling the
+	  whole system, include the kernel, kernel modules, libraries,
+	  and applications.
+
+	  If unsure, say N.
+
+endmenu
+
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/oprofile/Makefile xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/oprofile/Makefile
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/oprofile/Makefile	1969-12-31 18:00:00.000000000 -0600
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/oprofile/Makefile	2005-08-23 07:05:17.000000000 -0500
@@ -0,0 +1,9 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+		oprof.o cpu_buffer.o buffer_sync.o \
+		event_buffer.o oprofile_files.o \
+		oprofilefs.o oprofile_stats.o  \
+		timer_int.o )
+
+oprofile-y				:= $(DRIVER_OBJS) pmc.o
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h	1969-12-31 18:00:00.000000000 -0600
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h	2005-08-23 07:05:17.000000000 -0500
@@ -0,0 +1,29 @@
+/**
+ * @file op_counter.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ */
+ 
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+
+#define OP_MAX_COUNTER 8
+ 
+/* Per-perfctr configuration as set via
+ * oprofilefs.
+ */
+struct op_counter_config {
+        unsigned long count;
+        unsigned long enabled;
+        unsigned long event;
+        unsigned long kernel;
+        unsigned long user;
+        unsigned long unit_mask;
+};
+
+extern struct op_counter_config counter_config[];
+
+#endif /* OP_COUNTER_H */
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c	1969-12-31 18:00:00.000000000 -0600
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c	2005-08-23 07:31:26.000000000 -0500
@@ -0,0 +1,334 @@
+/**
+ * @file nmi_int.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/smp.h>
+#include <linux/oprofile.h>
+#include <linux/sysdev.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+ 
+#include "op_counter.h"
+ 
+static int pmc_start(void);
+static void pmc_stop(void);
+
+/* 0 == registered but off, 1 == registered and on */
+static int pmc_enabled = 0;
+static int num_events = 0;
+static int is_primary = 0;
+
+#ifdef CONFIG_PM
+
+static int pmc_suspend(struct sys_device *dev, u32 state)
+{
+	if (pmc_enabled == 1)
+		pmc_stop();
+	return 0;
+}
+
+
+static int pmc_resume(struct sys_device *dev)
+{
+	if (pmc_enabled == 1)
+		pmc_start();
+	return 0;
+}
+
+
+static struct sysdev_class oprofile_sysclass = {
+	set_kset_name("oprofile"),
+	.resume		= pmc_resume,
+	.suspend	= pmc_suspend,
+};
+
+
+static struct sys_device device_oprofile = {
+	.id	= 0,
+	.cls	= &oprofile_sysclass,
+};
+
+
+static int __init init_driverfs(void)
+{
+	int error;
+	if (!(error = sysdev_class_register(&oprofile_sysclass)))
+		error = sysdev_register(&device_oprofile);
+	return error;
+}
+
+
+static void __exit exit_driverfs(void)
+{
+	sysdev_unregister(&device_oprofile);
+	sysdev_class_unregister(&oprofile_sysclass);
+}
+
+#else
+#define init_driverfs() do { } while (0)
+#define exit_driverfs() do { } while (0)
+#endif /* CONFIG_PM */
+
+unsigned long long oprofile_samples = 0;
+
+static irqreturn_t pmc_ovf_interrupt (int irq, void *dev_id, struct pt_regs *regs)
+{
+	int head, tail;
+	shared_info_t *s = HYPERVISOR_shared_info;
+
+	head = s->event_head;
+	tail = s->event_tail;
+
+	/* oprofile_add_sample will also handle samples from other domains */
+
+	if (tail > head) {
+		while (tail < MAX_OPROF_EVENTS) {
+			oprofile_add_sample_xen(s->event_log[tail].eip, 
+				s->event_log[tail].mode, 
+				s->event_log[tail].event);
+			/*printk(KERN_INFO "pmc_sample: %p, %d, %d\n", 
+				s->event_log[tail].eip, s->event_log[tail].mode,
+				s->event_log[tail].event);*/
+			oprofile_samples++;
+			tail++;
+		}
+		tail = 0;
+	}
+	while (tail < head) {
+		oprofile_add_sample_xen(s->event_log[tail].eip, 
+			s->event_log[tail].mode, s->event_log[tail].event);
+		/*printk(KERN_INFO "pmc_sample: %p, %d, %d\n", 
+			s->event_log[tail].eip, s->event_log[tail].mode,
+			s->event_log[tail].event);*/
+		oprofile_samples++;
+		tail++;
+	}
+
+	s->event_tail = tail;
+	s->losing_samples = 0;
+
+	return IRQ_HANDLED;
+}
+
+extern int virq_to_phys(int virq);
+
+static int pmc_setup(void)
+{
+	int ret;
+
+	if ((ret = request_irq(bind_virq_to_irq(VIRQ_PMC_OVF), 
+		pmc_ovf_interrupt, SA_INTERRUPT, "pmc_ovf", NULL)))
+		goto release_irq;
+
+	if (is_primary) {
+		ret = HYPERVISOR_pmc_op(PMC_RESERVE_COUNTERS, (u64)NULL, (u64)NULL);
+		//printk(KERN_INFO "pmc_setup: reserve_counters: ret %d\n", ret);
+	
+		ret = HYPERVISOR_pmc_op(PMC_SETUP_EVENTS, (u64)&counter_config, (u64)num_events);
+		//printk(KERN_INFO "pmc_setup: setup_events: ret %d\n", ret);
+	}
+
+	ret = HYPERVISOR_pmc_op(PMC_ENABLE_VIRQ, (u64)NULL, (u64)NULL);
+	//printk(KERN_INFO "pmc_setup: enable_virq: ret %d\n", ret);
+
+	pmc_enabled = 1;
+	return 0;
+
+release_irq:
+	free_irq(virq_to_phys(VIRQ_PMC_OVF), NULL);
+	unbind_virq_from_irq(VIRQ_PMC_OVF);
+
+	return ret;
+}
+
+static void pmc_shutdown(void)
+{
+	int ret;
+	pmc_enabled = 0;
+
+	ret = HYPERVISOR_pmc_op(PMC_DISABLE_VIRQ, (u64)NULL, (u64)NULL);
+	//printk(KERN_INFO "pmc_shutdown: disable_virq: ret %d\n", ret);
+
+	if (is_primary) {
+		ret = HYPERVISOR_pmc_op(PMC_RELEASE_COUNTERS, (u64)NULL, (u64)NULL);
+		//printk(KERN_INFO "pmc_shutdown: release_counters: ret %d\n", ret);
+	}
+
+	free_irq(virq_to_phys(VIRQ_PMC_OVF), NULL);
+	unbind_virq_from_irq(VIRQ_PMC_OVF);
+}
+
+static int pmc_start(void)
+{
+	int ret = 0;
+	if (is_primary)
+		ret = HYPERVISOR_pmc_op(PMC_START, (u64)NULL, (u64)NULL);
+	//printk(KERN_INFO "pmc_start: ret %d\n", ret);
+	return ret;
+}
+ 
+static void pmc_stop(void)
+{
+	int ret = 0;
+	if (is_primary)
+		ret = HYPERVISOR_pmc_op(PMC_STOP, (u64)NULL, (u64)NULL);
+	//printk(KERN_INFO "pmc_stop: ret %d\n", ret);
+	printk(KERN_INFO "pmc: oprofile samples %llu, active %llu, passive %llu, other %llu, buffering losses %llu, NMI restarted %d\n", 
+		oprofile_samples, HYPERVISOR_shared_info->active_samples, HYPERVISOR_shared_info->passive_samples,
+		HYPERVISOR_shared_info->other_samples, HYPERVISOR_shared_info->samples_lost, HYPERVISOR_shared_info->nmi_restarts);
+}
+
+static int pmc_set_active(int *active_domains, unsigned int adomains)
+{
+	int ret = 0;
+	if (is_primary) 
+		ret = HYPERVISOR_pmc_op(PMC_SET_ACTIVE, 
+			(u64)active_domains, (u64)adomains); 
+	return ret;
+}
+
+static int pmc_set_passive(int *passive_domains, unsigned int pdomains)
+{
+	int ret = 0;
+	if (is_primary)
+		ret = HYPERVISOR_pmc_op(PMC_SET_PASSIVE,
+			(u64)passive_domains, (u64)pdomains);
+	return ret;
+}
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+static int pmc_create_files(struct super_block * sb, struct dentry * root)
+{
+	unsigned int i;
+
+	for (i = 0; i < num_events; ++i) {
+		struct dentry * dir;
+		char buf[2];
+ 
+		snprintf(buf, 2, "%d", i);
+		dir = oprofilefs_mkdir(sb, root, buf);
+		oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled); 
+		oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event); 
+		oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count); 
+		oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask); 
+		oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel); 
+		oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user); 
+	}
+
+	//printk(KERN_INFO "pmc_create_files\n");
+	return 0;
+}
+ 
+ 
+struct oprofile_operations pmc_ops = {
+	.create_files 	= pmc_create_files,
+	.set_active	= pmc_set_active,
+	.set_passive	= pmc_set_passive,
+	.setup 		= pmc_setup,
+	.shutdown	= pmc_shutdown,
+	.start		= pmc_start,
+	.stop		= pmc_stop
+};
+ 
+
+static void __init p4_init(void)
+{
+	__u8 cpu_model = current_cpu_data.x86_model;
+
+	if (cpu_model > 3)
+		pmc_ops.cpu_type = "type_unknown";
+
+	/* We always use a non-HT system because that goves us more events */
+	pmc_ops.cpu_type = "i386/p4";
+}
+
+
+static void __init ppro_init(void)
+{
+	__u8 cpu_model = current_cpu_data.x86_model;
+
+	if (cpu_model > 0xd)
+		pmc_ops.cpu_type = "type_unknown";
+
+	if (cpu_model == 9) {
+		pmc_ops.cpu_type = "i386/p6_mobile";
+	} else if (cpu_model > 5) {
+		pmc_ops.cpu_type = "i386/piii";
+	} else if (cpu_model > 2) {
+		pmc_ops.cpu_type = "i386/pii";
+	} else {
+		pmc_ops.cpu_type = "i386/ppro";
+	}
+}
+
+/* in order to get driverfs right */
+static int using_pmc;
+
+int __init oprofile_arch_init(struct oprofile_operations * ops)
+{
+	printk (KERN_INFO "oprofile_arch_init");
+	int ret = HYPERVISOR_pmc_op(PMC_INIT, (u64)&num_events, (u64)&is_primary);
+
+	if (!ret) {
+		__u8 vendor = current_cpu_data.x86_vendor;
+		__u8 family = current_cpu_data.x86;
+ 
+		if (vendor == X86_VENDOR_INTEL) {
+			switch (family) {
+				/* Pentium IV */
+				case 0xf:
+					p4_init();
+					break;
+				/* A P6-class processor */
+				case 6:
+					ppro_init();
+					break;
+				default:
+					pmc_ops.cpu_type = "type_unknown";
+			} 
+		} else if (vendor == X86_VENDOR_AMD) {
+                        switch (family) {
+				case 6:
+					pmc_ops.cpu_type = "i386/athlon";
+					break;
+				case 0xf:
+					pmc_ops.cpu_type = "x86-64/hammer";
+					break;
+				default:
+                                        pmc_ops.cpu_type = "type_unknown";
+			}
+		} else pmc_ops.cpu_type = "type_unknown";
+
+		init_driverfs();
+		using_pmc = 1;
+		*ops = pmc_ops;
+	}
+	printk (KERN_INFO "oprofile_arch_init: ret %d, events %d, is_primary %d\n", ret, num_events, is_primary);
+	return ret;
+}
+
+
+void __exit oprofile_arch_exit(void)
+{
+	if (using_pmc)
+		exit_driverfs();
+
+	if (is_primary)
+		HYPERVISOR_pmc_op(PMC_SHUTDOWN, (u64)NULL, (u64)NULL);
+
+}
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/x86_64/Makefile xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/x86_64/Makefile
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/arch/xen/x86_64/Makefile	2005-08-24 07:44:51.000000000 -0500
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/arch/xen/x86_64/Makefile	2005-08-23 07:05:17.000000000 -0500
@@ -69,7 +69,6 @@ libs-y 					+= arch/x86_64/lib/
 core-y					+= arch/xen/x86_64/kernel/ arch/xen/x86_64/mm/
 core-$(CONFIG_IA32_EMULATION)		+= arch/xen/x86_64/ia32/
 drivers-$(CONFIG_PCI)			+= arch/xen/x86_64/pci/
-drivers-$(CONFIG_OPROFILE)		+= arch/x86_64/oprofile/
 
 # for clean
 obj-	+= kernel/ mm/ pci/
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c xen-unstable.hg-20050823/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c	2005-06-17 14:48:29.000000000 -0500
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c	2005-08-23 07:05:17.000000000 -0500
@@ -6,6 +6,10 @@
  *
  * @author John Levon <levon@movementarian.org>
  *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
  * This is the core of the buffer management. Each
  * CPU buffer is processed and entered into the
  * global event buffer. Such processing is necessary
@@ -265,13 +269,30 @@ static void add_cpu_switch(int i)
 	last_cookie = ~0UL;
 }
 
-static void add_kernel_ctx_switch(unsigned int in_kernel)
+static void add_cpu_mode_switch(unsigned int cpu_mode)
 {
 	add_event_entry(ESCAPE_CODE);
-	if (in_kernel)
-		add_event_entry(KERNEL_ENTER_SWITCH_CODE); 
-	else
-		add_event_entry(KERNEL_EXIT_SWITCH_CODE); 
+	switch (cpu_mode)
+	{
+	case CPU_MODE_USER:
+		add_event_entry(USER_ENTER_SWITCH_CODE);
+		break;
+	case CPU_MODE_KERNEL:
+		add_event_entry(KERNEL_ENTER_SWITCH_CODE);
+		break;
+	case CPU_MODE_XEN:
+		add_event_entry(XEN_ENTER_SWITCH_CODE);
+		break;
+	default:
+		break;
+	}
+}
+
+static void add_dom_switch(int domain_id)
+{
+	add_event_entry(ESCAPE_CODE);
+	add_event_entry(DOMAIN_SWITCH_CODE);
+	add_event_entry(domain_id);
 }
  
 static void
@@ -337,10 +358,9 @@ static int add_us_sample(struct mm_struc
  * sample is converted into a persistent dentry/offset pair
  * for later lookup from userspace.
  */
-static int
-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
+static int add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode)
 {
-	if (in_kernel) {
+	if (cpu_mode >= CPU_MODE_KERNEL) {
 		add_sample_entry(s->eip, s->event);
 		return 1;
 	} else if (mm) {
@@ -374,6 +394,11 @@ static inline int is_code(unsigned long 
 {
 	return val == ESCAPE_CODE;
 }
+
+static inline int is_dom_switch(unsigned long val)
+{
+	return val == DOMAIN_SWITCH_ESCAPE_CODE;
+}
  
 
 /* "acquire" as many cpu buffer slots as we can */
@@ -489,10 +514,11 @@ void sync_buffer(int cpu)
 	struct mm_struct *mm = NULL;
 	struct task_struct * new;
 	unsigned long cookie = 0;
-	int in_kernel = 1;
+	int cpu_mode = 1;
 	unsigned int i;
 	sync_buffer_state state = sb_buffer_start;
 	unsigned long available;
+	int domain_switch = 0;
 
 	down(&buffer_sem);
  
@@ -506,12 +532,12 @@ void sync_buffer(int cpu)
 		struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
  
 		if (is_code(s->eip)) {
-			if (s->event <= CPU_IS_KERNEL) {
+			if (s->event <= CPU_MODE_MAX) {
 				/* kernel/userspace switch */
-				in_kernel = s->event;
+				cpu_mode = s->event;
 				if (state == sb_buffer_start)
 					state = sb_sample_start;
-				add_kernel_ctx_switch(s->event);
+				add_cpu_mode_switch(s->event);
 			} else if (s->event == CPU_TRACE_BEGIN) {
 				state = sb_bt_start;
 				add_trace_begin();
@@ -528,11 +554,23 @@ void sync_buffer(int cpu)
 				add_user_ctx_switch(new, cookie);
 			}
 		} else {
-			if (state >= sb_bt_start &&
-			    !add_sample(mm, s, in_kernel)) {
-				if (state == sb_bt_start) {
-					state = sb_bt_ignore;
-					atomic_inc(&oprofile_stats.bt_lost_no_mapping);
+			if (is_dom_switch(s->eip)) {
+				add_dom_switch((int)(s->event));
+				domain_switch = 1;
+			}
+			else {
+				if (domain_switch) {
+					add_sample_entry (s->eip, s->event);
+					domain_switch = 0;
+				}
+				else {
+					if (state >= sb_bt_start &&
+			    		    !add_sample(mm, s, cpu_mode)) {
+						if (state == sb_bt_start) {
+							state = sb_bt_ignore;
+							atomic_inc(&oprofile_stats.bt_lost_no_mapping);
+						}
+					}
 				}
 			}
 		}
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c xen-unstable.hg-20050823/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c	2005-06-17 14:48:29.000000000 -0500
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c	2005-08-23 07:05:17.000000000 -0500
@@ -6,6 +6,10 @@
  *
  * @author John Levon <levon@movementarian.org>
  *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
  * Each CPU has a local buffer that stores PC value/event
  * pairs. We also log context switches when we notice them.
  * Eventually each CPU's buffer is processed into the global
@@ -58,7 +62,7 @@ int alloc_cpu_buffers(void)
 			goto fail;
  
 		b->last_task = NULL;
-		b->last_is_kernel = -1;
+		b->last_cpu_mode = -1;
 		b->tracing = 0;
 		b->buffer_size = buffer_size;
 		b->tail_pos = 0;
@@ -117,7 +121,7 @@ void cpu_buffer_reset(struct oprofile_cp
 	 * collected will populate the buffer with proper
 	 * values to initialize the buffer
 	 */
-	cpu_buf->last_is_kernel = -1;
+	cpu_buf->last_cpu_mode = -1;
 	cpu_buf->last_task = NULL;
 }
 
@@ -180,7 +184,7 @@ add_code(struct oprofile_cpu_buffer * bu
  * events whenever is_kernel changes
  */
 static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
-		      int is_kernel, unsigned long event)
+		      int cpu_mode, unsigned long event)
 {
 	struct task_struct * task;
 
@@ -191,24 +195,39 @@ static int log_sample(struct oprofile_cp
 		return 0;
 	}
 
-	is_kernel = !!is_kernel;
+	// Ensure a valid cpu mode
+	if (cpu_mode > CPU_MODE_XEN)
+		return 0;
 
 	task = current;
 
-	/* notice a switch from user->kernel or vice versa */
-	if (cpu_buf->last_is_kernel != is_kernel) {
-		cpu_buf->last_is_kernel = is_kernel;
-		add_code(cpu_buf, is_kernel);
-	}
 
-	/* notice a task switch */
-	if (cpu_buf->last_task != task) {
-		cpu_buf->last_task = task;
-		add_code(cpu_buf, (unsigned long)task);
+	/* We treat samples from other domains in a special manner: 
+           each sample is preceded by a record with eip equal to ~1UL. 
+           This record is non-sticky i.e. it holds only for the following 
+           sample. The event field of this record stores the domain id.*/ 
+	if (pc == DOMAIN_SWITCH_ESCAPE_CODE) {
+		add_sample(cpu_buf, pc, event);
+		return 1;
+	} else {
+		/* notice a switch from user->kernel or vice versa */
+		if (cpu_buf->last_cpu_mode != cpu_mode) {
+			cpu_buf->last_cpu_mode = cpu_mode;
+			add_code(cpu_buf, cpu_mode);
+		}
+
+		/* notice a task switch */
+		if (cpu_buf->last_task != task) {
+			cpu_buf->last_task = task;
+			add_code(cpu_buf, (unsigned long)task);
+		}
+
+		/* Note: at this point, we lose the cpu_mode of a sample
+		   if it is from another domain */
+
+		add_sample(cpu_buf, pc, event);
+		return 1;
 	}
- 
-	add_sample(cpu_buf, pc, event);
-	return 1;
 }
 
 static int oprofile_begin_trace(struct oprofile_cpu_buffer * cpu_buf)
@@ -229,6 +248,14 @@ static void oprofile_end_trace(struct op
 	cpu_buf->tracing = 0;
 }
 
+void oprofile_add_sample_xen(unsigned long eip, unsigned int cpu_mode, 
+	unsigned long event)
+{
+	struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
+	log_sample(cpu_buf, eip, cpu_mode, event);
+
+  
+}
 
 void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
 {
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h xen-unstable.hg-20050823/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h	2005-06-17 14:48:29.000000000 -0500
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h	2005-08-23 07:05:17.000000000 -0500
@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer {
 	volatile unsigned long tail_pos;
 	unsigned long buffer_size;
 	struct task_struct * last_task;
-	int last_is_kernel;
+	int last_cpu_mode;
 	int tracing;
 	struct op_sample * buffer;
 	unsigned long sample_received;
@@ -51,7 +51,14 @@ extern struct oprofile_cpu_buffer cpu_bu
 void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
 
 /* transient events for the CPU buffer -> event buffer */
-#define CPU_IS_KERNEL 1
-#define CPU_TRACE_BEGIN 2
+#define CPU_MODE_USER    0
+#define CPU_MODE_KERNEL  1
+#define CPU_MODE_XEN     2
+#define CPU_MODE_MAX     2
+#define CPU_TRACE_BEGIN  3
 
+/* special escape code for indicating next sample in the CPU */
+/* buffer is from another Xen domain */
+#define DOMAIN_SWITCH_ESCAPE_CODE ~1UL
+ 
 #endif /* OPROFILE_CPU_BUFFER_H */
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c xen-unstable.hg-20050823/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c	2005-06-17 14:48:29.000000000 -0500
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c	2005-08-23 07:05:17.000000000 -0500
@@ -56,6 +56,7 @@ void add_event_entry(unsigned long value
 /* Wake up the waiting process if any. This happens
  * on "echo 0 >/dev/oprofile/enable" so the daemon
  * processes the data remaining in the event buffer.
+ * also called on echo 1 > /dev/oprofile/dump
  */
 void wake_up_buffer_waiter(void)
 {
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h xen-unstable.hg-20050823/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h	2005-06-17 14:48:29.000000000 -0500
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h	2005-08-23 07:05:17.000000000 -0500
@@ -5,6 +5,10 @@
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  */
 
 #ifndef EVENT_BUFFER_H
@@ -29,11 +33,13 @@ void wake_up_buffer_waiter(void);
 #define CPU_SWITCH_CODE 		2
 #define COOKIE_SWITCH_CODE 		3
 #define KERNEL_ENTER_SWITCH_CODE	4
-#define KERNEL_EXIT_SWITCH_CODE		5
+#define USER_ENTER_SWITCH_CODE		5
 #define MODULE_LOADED_CODE		6
 #define CTX_TGID_CODE			7
 #define TRACE_BEGIN_CODE		8
 #define TRACE_END_CODE			9
+#define XEN_ENTER_SWITCH_CODE		10
+#define DOMAIN_SWITCH_CODE		11
  
 /* add data to the event buffer */
 void add_event_entry(unsigned long data);
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/drivers/oprofile/oprof.c xen-unstable.hg-20050823/linux-2.6.12-xen0/drivers/oprofile/oprof.c
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/drivers/oprofile/oprof.c	2005-06-17 14:48:29.000000000 -0500
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/drivers/oprofile/oprof.c	2005-08-23 07:05:17.000000000 -0500
@@ -5,6 +5,10 @@
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  */
 
 #include <linux/kernel.h>
@@ -33,6 +37,25 @@ static DECLARE_MUTEX(start_sem);
  */
 static int timer = 0;
 
+extern unsigned int adomains, pdomains;
+extern int active_domains[MAX_OPROF_DOMAINS], passive_domains[MAX_OPROF_DOMAINS];
+
+int oprofile_set_active(void)
+{
+	if (oprofile_ops.set_active)
+		return oprofile_ops.set_active(active_domains, adomains);
+
+	return -EINVAL;
+}
+
+int oprofile_set_passive(void)
+{
+	if (oprofile_ops.set_passive)
+		return oprofile_ops.set_passive(passive_domains, pdomains);
+
+	return -EINVAL;
+}
+
 int oprofile_setup(void)
 {
 	int err;
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c xen-unstable.hg-20050823/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c	2005-06-17 14:48:29.000000000 -0500
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c	2005-08-23 07:05:17.000000000 -0500
@@ -5,10 +5,16 @@
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.	
  */
 
 #include <linux/fs.h>
 #include <linux/oprofile.h>
+#include <linux/pagemap.h>
+#include <linux/ctype.h>
 
 #include "event_buffer.h"
 #include "oprofile_stats.h"
@@ -117,11 +123,140 @@ static ssize_t dump_write(struct file * 
 static struct file_operations dump_fops = {
 	.write		= dump_write,
 };
- 
+
+#define TMPBUFSIZE 50
+
+unsigned int adomains = 0;
+long active_domains[MAX_OPROF_DOMAINS];
+
+extern int oprofile_set_active(void);
+
+static ssize_t adomain_write(struct file *file, char const __user *buf, size_t count, loff_t * offset)
+{
+	char tmpbuf[TMPBUFSIZE];
+	char *startp = tmpbuf;
+	char *endp = tmpbuf;
+	int i;
+	unsigned long val;
+	
+	if (*offset)
+		return -EINVAL;	
+	if (!count)
+		return 0;
+	if (count > TMPBUFSIZE - 1)
+		return -EINVAL;
+
+	memset(tmpbuf, 0x0, TMPBUFSIZE);
+
+	if (copy_from_user(tmpbuf, buf, count))
+		return -EFAULT;
+	
+	for (i = 0; i < MAX_OPROF_DOMAINS; i++)
+		active_domains[i] = -1;
+	adomains = 0;
+
+	while (1) {
+		val = simple_strtol(startp, &endp, 0);
+		if (endp == startp)
+			break;
+		while (ispunct(*endp))
+			endp++;
+		active_domains[adomains++] = val;
+		if (adomains >= MAX_OPROF_DOMAINS)
+			break;
+		startp = endp;
+	}
+	if (oprofile_set_active())
+		return -EINVAL; 
+	return count;
+}
+
+static ssize_t adomain_read(struct file *file, char __user * buf, size_t count, loff_t * offset)
+{
+	char tmpbuf[TMPBUFSIZE];
+	size_t len = 0;
+	int i;
+	/* This is all screwed up if we run out of space */
+	for (i = 0; i < adomains; i++) 
+		len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "%u ", (unsigned int)active_domains[i]);
+	len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "\n");
+	return simple_read_from_buffer((void __user *)buf, count, offset, tmpbuf, len);
+}
+
+
+static struct file_operations active_domain_ops = {
+	.read		= adomain_read,
+	.write		= adomain_write,
+};
+
+unsigned int pdomains = 0;
+long passive_domains[MAX_OPROF_DOMAINS];
+
+extern int oprofile_set_passive(void);
+
+static ssize_t pdomain_write(struct file *file, char const __user *buf, size_t count, loff_t * offset)
+{
+	char tmpbuf[TMPBUFSIZE];
+	char *startp = tmpbuf;
+	char *endp = tmpbuf;
+	int i;
+	unsigned long val;
+	
+	if (*offset)
+		return -EINVAL;	
+	if (!count)
+		return 0;
+	if (count > TMPBUFSIZE - 1)
+		return -EINVAL;
+
+	memset(tmpbuf, 0x0, TMPBUFSIZE);
+
+	if (copy_from_user(tmpbuf, buf, count))
+		return -EFAULT;
+	
+	for (i = 0; i < MAX_OPROF_DOMAINS; i++)
+		passive_domains[i] = -1;
+	pdomains = 0;
+
+	while (1) {
+		val = simple_strtol(startp, &endp, 0);
+		if (endp == startp)
+			break;
+		while (ispunct(*endp))
+			endp++;
+		passive_domains[pdomains++] = val;
+		if (pdomains >= MAX_OPROF_DOMAINS)
+			break;
+		startp = endp;
+	}
+	if (oprofile_set_passive())
+		return -EINVAL; 
+	return count;
+}
+
+static ssize_t pdomain_read(struct file *file, char __user * buf, size_t count, loff_t * offset)
+{
+	char tmpbuf[TMPBUFSIZE];
+	size_t len = 0;
+	int i;
+	/* This is all screwed up if we run out of space */
+	for (i = 0; i < pdomains; i++) 
+		len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "%u ", (unsigned int)passive_domains[i]);
+	len += snprintf (tmpbuf + len, TMPBUFSIZE - len, "\n");
+	return simple_read_from_buffer((void __user *)buf, count, offset, tmpbuf, len);
+}
+
+static struct file_operations passive_domain_ops = {
+	.read		= pdomain_read,
+	.write		= pdomain_write,
+};
+
 void oprofile_create_files(struct super_block * sb, struct dentry * root)
 {
 	oprofilefs_create_file(sb, root, "enable", &enable_fops);
 	oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
+	oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
+	oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops);
 	oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
 	oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
 	oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed);
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/include/asm-x86_64/offset.h xen-unstable.hg-20050823/linux-2.6.12-xen0/include/asm-x86_64/offset.h
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/include/asm-x86_64/offset.h	1969-12-31 18:00:00.000000000 -0600
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/include/asm-x86_64/offset.h	2005-08-23 07:06:19.000000000 -0500
@@ -0,0 +1,43 @@
+#ifndef __ASM_OFFSETS_H__
+#define __ASM_OFFSETS_H__
+/*
+ * DO NOT MODIFY.
+ *
+ * This file was generated by arch/xen/Makefile
+ *
+ */
+
+#define tsk_state 0 /* offsetof(struct task_struct, state) */
+#define tsk_flags 24 /* offsetof(struct task_struct, flags) */
+#define tsk_thread 752 /* offsetof(struct task_struct, thread) */
+#define tsk_pid 252 /* offsetof(struct task_struct, pid) */
+
+#define threadinfo_flags 16 /* offsetof(struct thread_info, flags) */
+#define threadinfo_addr_limit 32 /* offsetof(struct thread_info, addr_limit) */
+#define threadinfo_preempt_count 28 /* offsetof(struct thread_info, preempt_count) */
+
+#define pda_kernelstack 24 /* offsetof(struct x8664_pda, kernelstack) */
+#define pda_oldrsp 32 /* offsetof(struct x8664_pda, oldrsp) */
+#define pda_pcurrent 0 /* offsetof(struct x8664_pda, pcurrent) */
+#define pda_irqrsp 40 /* offsetof(struct x8664_pda, irqrsp) */
+#define pda_irqcount 48 /* offsetof(struct x8664_pda, irqcount) */
+#define pda_cpunumber 52 /* offsetof(struct x8664_pda, cpunumber) */
+#define pda_irqstackptr 56 /* offsetof(struct x8664_pda, irqstackptr) */
+
+#define IA32_SIGCONTEXT_eax 44 /* offsetof(struct sigcontext_ia32, eax) */
+#define IA32_SIGCONTEXT_ebx 32 /* offsetof(struct sigcontext_ia32, ebx) */
+#define IA32_SIGCONTEXT_ecx 40 /* offsetof(struct sigcontext_ia32, ecx) */
+#define IA32_SIGCONTEXT_edx 36 /* offsetof(struct sigcontext_ia32, edx) */
+#define IA32_SIGCONTEXT_esi 20 /* offsetof(struct sigcontext_ia32, esi) */
+#define IA32_SIGCONTEXT_edi 16 /* offsetof(struct sigcontext_ia32, edi) */
+#define IA32_SIGCONTEXT_ebp 24 /* offsetof(struct sigcontext_ia32, ebp) */
+#define IA32_SIGCONTEXT_esp 28 /* offsetof(struct sigcontext_ia32, esp) */
+#define IA32_SIGCONTEXT_eip 56 /* offsetof(struct sigcontext_ia32, eip) */
+
+#define IA32_RT_SIGFRAME_sigcontext 164 /* offsetof (struct rt_sigframe32, uc.uc_mcontext) */
+
+#define pbe_address 0 /* offsetof(struct pbe, address) */
+#define pbe_orig_address 8 /* offsetof(struct pbe, orig_address) */
+#define pbe_next 24 /* offsetof(struct pbe, next) */
+
+#endif
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h xen-unstable.hg-20050823/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h	2005-08-24 07:44:51.000000000 -0500
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h	2005-08-23 07:05:17.000000000 -0500
@@ -576,4 +576,21 @@ HYPERVISOR_vcpu_pickle(
     return ret;
 }
 
+
+static inline int
+HYPERVISOR_pmc_op(
+	int op, unsigned int arg1, unsigned int arg2)
+{
+	int ret;
+	unsigned long ign1, ign2, ign3;
+
+	__asm__ __volatile__ (
+	       TRAP_INSTR
+	       : "=a"(ret), "=b"(ign1), "=c"(ign2), "=d"(ign3)
+	       : "0"(__HYPERVISOR_pmc_op), "1"(op), "2"(arg1), "3"(arg2)
+	       : "memory" );
+
+	return ret;
+}
+
 #endif /* __HYPERCALL_H__ */
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h xen-unstable.hg-20050823/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h	2005-08-24 07:44:51.000000000 -0500
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h	2005-08-23 07:05:17.000000000 -0500
@@ -519,4 +519,19 @@ HYPERVISOR_vcpu_pickle(
     return ret;
 }
 
+static inline int
+HYPERVISOR_pmc_op(
+	int op, u64 arg1, u64 arg2)
+{
+	int ret;
+
+	__asm__ __volatile__ (
+		TRAP_INSTR
+		: "=a"(ret)
+		: "0"(__HYPERVISOR_pmc_op), "D"(op), "S"(arg1), "d"(arg2)
+		: __syscall_clobber );
+
+	return ret;
+}
+
 #endif /* __HYPERCALL_H__ */
diff -Naurp xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/include/linux/oprofile.h xen-unstable.hg-20050823/linux-2.6.12-xen0/include/linux/oprofile.h
--- xen-unstable.hg-20050823-nooprofile/linux-2.6.12-xen0/include/linux/oprofile.h	2005-06-17 14:48:29.000000000 -0500
+++ xen-unstable.hg-20050823/linux-2.6.12-xen0/include/linux/oprofile.h	2005-08-23 07:05:17.000000000 -0500
@@ -8,6 +8,10 @@
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  */
 
 #ifndef OPROFILE_H
@@ -27,6 +31,10 @@ struct oprofile_operations {
 	/* create any necessary configuration files in the oprofile fs.
 	 * Optional. */
 	int (*create_files)(struct super_block * sb, struct dentry * root);
+	/* setup active domains with Xen */
+	int (*set_active)(int *active_domains, unsigned int adomains);
+	/* setup passive domains with Xen */
+	int (*set_passive)(int *passive_domains, unsigned int pdomains);
 	/* Do any necessary interrupt setup. Optional. */
 	int (*setup)(void);
 	/* Do any necessary interrupt shutdown. Optional. */
@@ -61,6 +69,15 @@ void oprofile_arch_exit(void);
  */
 void oprofile_add_sample(struct pt_regs * const regs, unsigned long event);
 
+/**
+ * alternative function to Add a sample for Xen. 
+ * It would be better to combine both functions into only one but this would 
+ * require getting parameter cpu_mode(old is_kernel) back to 
+ * oprofile_add_sample() m(Xen is the best location to determine cpu_mode)
+ */
+extern void oprofile_add_sample_xen(unsigned long eip, unsigned int cpu_mode, 
+	unsigned long event);
+
 /* Use this instead when the PC value is not from the regs. Doesn't
  * backtrace. */
 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event);

[-- Attachment #3: xenoprof-1.2-x86_64-xen.patch2 --]
[-- Type: text/x-diff, Size: 68332 bytes --]

diff -Naurp xen-unstable.hg-20050823-nooprofile/xen/arch/x86/Makefile xen-unstable.hg-20050823/xen/arch/x86/Makefile
--- xen-unstable.hg-20050823-nooprofile/xen/arch/x86/Makefile	2005-08-24 07:44:52.000000000 -0500
+++ xen-unstable.hg-20050823/xen/arch/x86/Makefile	2005-08-23 07:05:17.000000000 -0500
@@ -33,7 +33,10 @@ ifneq ($(crash_debug),y)
 OBJS := $(patsubst cdb%.o,,$(OBJS))
 endif
 
+OBJS += oprofile/oprofile.o
+
 default: $(TARGET)
+	make -C oprofile
 
 $(TARGET): $(TARGET)-syms boot/mkelf32
 	./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000
@@ -60,6 +63,9 @@ asm-offsets.s: $(TARGET_SUBARCH)/asm-off
 boot/mkelf32: boot/mkelf32.c
 	$(HOSTCC) $(HOSTCFLAGS) -o $@ $<
 
+oprofile/oprofile.o:
+	$(MAKE) -C oprofile
+
 clean:
 	rm -f *.o *.s *~ core boot/*.o boot/*~ boot/core boot/mkelf32
 	rm -f x86_32/*.o x86_32/*~ x86_32/core
@@ -68,5 +74,6 @@ clean:
 	rm -f acpi/*.o acpi/*~ acpi/core
 	rm -f genapic/*.o genapic/*~ genapic/core
 	rm -f cpu/*.o cpu/*~ cpu/core
+	rm -f oprofile/*.o
 
 .PHONY: default clean
diff -Naurp xen-unstable.hg-20050823-nooprofile/xen/arch/x86/nmi.c xen-unstable.hg-20050823/xen/arch/x86/nmi.c
--- xen-unstable.hg-20050823-nooprofile/xen/arch/x86/nmi.c	2005-08-24 07:44:52.000000000 -0500
+++ xen-unstable.hg-20050823/xen/arch/x86/nmi.c	2005-08-23 07:05:17.000000000 -0500
@@ -5,6 +5,10 @@
  *
  *  Started by Ingo Molnar <mingo@redhat.com>
  *
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
  *  Fixes:
  *  Mikael Pettersson	: AMD K7 support for local APIC NMI watchdog.
  *  Mikael Pettersson	: Power Management for local APIC NMI watchdog.
@@ -35,6 +39,28 @@ static unsigned int nmi_p4_cccr_val;
 static struct ac_timer nmi_timer[NR_CPUS];
 static unsigned int nmi_timer_ticks[NR_CPUS];
 
+/*
+ * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
+ * - it may be reserved by some other driver, or not
+ * - when not reserved by some other driver, it may be used for
+ *   the NMI watchdog, or not
+ *
+ * This is maintained separately from nmi_active because the NMI
+ * watchdog may also be driven from the I/O APIC timer.
+ */
+static spinlock_t lapic_nmi_owner_lock = SPIN_LOCK_UNLOCKED;
+static unsigned int lapic_nmi_owner;
+#define LAPIC_NMI_WATCHDOG      (1<<0)
+#define LAPIC_NMI_RESERVED      (1<<1)
+                                                                                                             
+/* nmi_active:
+ * +1: the lapic NMI watchdog is active, but can be disabled
+ *  0: the lapic NMI watchdog has not been set up, and cannot
+ *     be enabled
+ * -1: the lapic NMI watchdog is disabled, but can be enabled
+ */
+int nmi_active;
+
 #define K7_EVNTSEL_ENABLE	(1 << 22)
 #define K7_EVNTSEL_INT		(1 << 20)
 #define K7_EVNTSEL_OS		(1 << 17)
@@ -66,8 +92,6 @@ static unsigned int nmi_timer_ticks[NR_C
  * max threshold. [IA32-Vol3, Section 14.9.9] 
  */
 #define MSR_P4_IQ_COUNTER0	0x30C
-#define MSR_P4_IQ_CCCR0		0x36C
-#define MSR_P4_CRU_ESCR0	0x3B8 /* ESCR no. 4 */
 #define P4_NMI_CRU_ESCR0	P4_ESCR_EVENT_SELECT(0x3F)
 #define P4_NMI_IQ_CCCR0	\
     (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
@@ -124,6 +148,70 @@ static inline void nmi_pm_init(void) { }
  * Original code written by Keith Owens.
  */
 
+static void disable_lapic_nmi_watchdog(void)
+{
+        if (nmi_active <= 0)
+                return;
+        switch (boot_cpu_data.x86_vendor) {
+        case X86_VENDOR_AMD:
+                wrmsr(MSR_K7_EVNTSEL0, 0, 0);
+                break;
+        case X86_VENDOR_INTEL:
+                switch (boot_cpu_data.x86) {
+                case 6:
+                        wrmsr(MSR_P6_EVNTSEL0, 0, 0);
+                        break;
+                case 15:
+			if ( (smp_num_siblings <= 1) ||
+			     ( (smp_processor_id() % smp_num_siblings) == 0) )
+			{
+                        	wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
+	                        wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
+			} else {
+				wrmsr(MSR_P4_IQ_CCCR1, 0, 0);	
+			}
+                        break;
+                }
+                break;
+        }
+        nmi_active = -1;
+        /* tell do_nmi() and others that we're not active any more */
+        nmi_watchdog = 0;
+}
+
+static void enable_lapic_nmi_watchdog(void)
+{
+        if (nmi_active < 0) {
+                nmi_watchdog = NMI_LOCAL_APIC;
+                setup_apic_nmi_watchdog();
+        }
+}
+
+int reserve_lapic_nmi(void)
+{
+        unsigned int old_owner;
+        spin_lock(&lapic_nmi_owner_lock);
+        old_owner = lapic_nmi_owner;
+        lapic_nmi_owner |= LAPIC_NMI_RESERVED;
+        spin_unlock(&lapic_nmi_owner_lock);
+        if (old_owner & LAPIC_NMI_RESERVED)
+                return -EBUSY;
+        if (old_owner & LAPIC_NMI_WATCHDOG)
+                disable_lapic_nmi_watchdog();
+        return 0;
+}
+
+void release_lapic_nmi(void)
+{
+        unsigned int new_owner;
+        spin_lock(&lapic_nmi_owner_lock);
+        new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED;
+        lapic_nmi_owner = new_owner;
+        spin_unlock(&lapic_nmi_owner_lock);
+        if (new_owner & LAPIC_NMI_WATCHDOG)
+                enable_lapic_nmi_watchdog();
+}
+
 static void __pminit clear_msr_range(unsigned int base, unsigned int n)
 {
     unsigned int i;
@@ -241,6 +329,9 @@ void __pminit setup_apic_nmi_watchdog(vo
 
     init_ac_timer(&nmi_timer[cpu], nmi_timer_fn, NULL, cpu);
 
+    lapic_nmi_owner = LAPIC_NMI_WATCHDOG;
+    nmi_active = 1;
+
     nmi_pm_init();
 }
 
@@ -337,3 +428,7 @@ void nmi_watchdog_tick(struct cpu_user_r
         wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
     }
 }
+
+EXPORT_SYMBOL(reserve_lapic_nmi);
+EXPORT_SYMBOL(release_lapic_nmi);
+
diff -Naurp xen-unstable.hg-20050823-nooprofile/xen/arch/x86/oprofile/Makefile xen-unstable.hg-20050823/xen/arch/x86/oprofile/Makefile
--- xen-unstable.hg-20050823-nooprofile/xen/arch/x86/oprofile/Makefile	1969-12-31 18:00:00.000000000 -0600
+++ xen-unstable.hg-20050823/xen/arch/x86/oprofile/Makefile	2005-08-23 07:05:17.000000000 -0500
@@ -0,0 +1,9 @@
+
+include $(BASEDIR)/Rules.mk
+                                     
+default: $(OBJS) 
+	$(LD) $(LDFLAGS) -r -o oprofile.o $(OBJS)
+
+%.o: %.c $(HDRS) Makefile
+	$(CC) $(CFLAGS) -c $< -o $@
+
diff -Naurp xen-unstable.hg-20050823-nooprofile/xen/arch/x86/oprofile/nmi_int.c xen-unstable.hg-20050823/xen/arch/x86/oprofile/nmi_int.c
--- xen-unstable.hg-20050823-nooprofile/xen/arch/x86/oprofile/nmi_int.c	1969-12-31 18:00:00.000000000 -0600
+++ xen-unstable.hg-20050823/xen/arch/x86/oprofile/nmi_int.c	2005-08-24 05:20:33.000000000 -0500
@@ -0,0 +1,453 @@
+/**
+ * @file nmi_int.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <xen/event.h>
+#include <xen/types.h>
+#include <xen/errno.h>
+#include <xen/init.h>
+#include <public/xen.h>
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+#include <xen/delay.h>
+ 
+#include "op_counter.h"
+#include "op_x86_model.h"
+ 
+static struct op_x86_model_spec const * model;
+static struct op_msrs cpu_msrs[NR_CPUS];
+static unsigned long saved_lvtpc[NR_CPUS];
+
+#define VIRQ_BITMASK_SIZE	(MAX_OPROF_DOMAINS/32 + 1)
+
+extern int active_domains[MAX_OPROF_DOMAINS];
+extern unsigned int adomains;
+
+extern struct domain * primary_profiler;
+extern struct domain * adomain_ptrs[MAX_OPROF_DOMAINS];
+extern unsigned long virq_ovf_pending[VIRQ_BITMASK_SIZE];
+
+extern int is_active(struct domain *d);
+extern int active_id(struct domain *d);
+extern int is_passive(struct domain *d);
+extern int is_profiled(struct domain *d);
+
+
+int nmi_profiling_started = 0;
+
+int active_virq_count = 0;
+int passive_virq_count = 0;
+int other_virq_count = 0;
+int other_id = -1;
+int xen_count = 0;
+int dom_count = 0; 
+int ovf = 0;
+
+int nmi_callback(struct cpu_user_regs * regs, int cpu)
+{
+	int xen_mode = 0;
+
+	ovf = model->check_ctrs(cpu, &cpu_msrs[cpu], regs);
+	xen_mode = RING_0(regs);
+	if (ovf) {
+		if (xen_mode)
+			xen_count++;
+		else
+			dom_count++;
+
+		if (is_active(current->domain)) {
+		/* This is slightly incorrect. If we do not deliver 
+			OVF virtual interrupts in a synchronous 
+			manner, a process switch may happen in the domain 
+			between the point the sample was collected and 
+			the point at which a VIRQ was delivered. However, 
+			it is not safe to call send_guest_virq from this 
+			NMI context, it may lead to a deadlock since NMIs are 
+			unmaskable. One optimization that we can do is 
+			that if the sample occurs while domain code is 
+			runnng, we know that it is safe to call 
+			send_guest_virq, since we know no Xen code 
+			is running at that time.
+			However, this may distort the sample distribution,
+			because we may lose more Xen mode samples.*/
+			active_virq_count++;
+			if (!xen_mode) {
+				send_guest_virq(current, VIRQ_PMC_OVF);
+				clear_bit(active_id(current->domain), &virq_ovf_pending[0]);
+			} else 
+				set_bit(active_id(current->domain), &virq_ovf_pending[0]);
+			primary_profiler->shared_info->active_samples++;
+		}
+		else if (is_passive(current->domain)) {
+			set_bit(active_id(primary_profiler), &virq_ovf_pending[0]);
+			passive_virq_count++;
+			primary_profiler->shared_info->passive_samples++;
+		}
+		else {
+			other_virq_count++;
+			other_id = current->domain->domain_id;
+			primary_profiler->shared_info->other_samples++;
+		}
+	}
+	return 1;
+}
+
+static void free_msrs(void)
+{
+	int i;
+	for (i = 0; i < NR_CPUS; ++i) {
+		xfree(cpu_msrs[i].counters);
+		cpu_msrs[i].counters = NULL;
+		xfree(cpu_msrs[i].controls);
+		cpu_msrs[i].controls = NULL;
+	}
+}
+ 
+static int allocate_msrs(void)
+{
+	int success = 1;
+	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
+	size_t counters_size = sizeof(struct op_msr) * model->num_counters;
+
+	int i;
+	for (i = 0; i < NR_CPUS; ++i) {
+		//if (!cpu_online(i))
+		if (!test_bit(i, &cpu_online_map))
+			continue;
+
+		cpu_msrs[i].counters = xmalloc_bytes(counters_size);
+		if (!cpu_msrs[i].counters) {
+			success = 0;
+			break;
+		}
+		cpu_msrs[i].controls = xmalloc_bytes(controls_size);
+		if (!cpu_msrs[i].controls) {
+			success = 0;
+			break;
+		}
+	}
+	if (!success)
+		free_msrs();
+
+	return success;
+}
+
+static void nmi_cpu_save_registers(struct op_msrs * msrs)
+{
+	unsigned int const nr_ctrs = model->num_counters;
+	unsigned int const nr_ctrls = model->num_controls; 
+	struct op_msr * counters = msrs->counters;
+	struct op_msr * controls = msrs->controls;
+	unsigned int i;
+
+	for (i = 0; i < nr_ctrs; ++i) {
+		rdmsr(counters[i].addr,
+			counters[i].saved.low,
+			counters[i].saved.high);
+	}
+ 
+	for (i = 0; i < nr_ctrls; ++i) {
+		rdmsr(controls[i].addr,
+			controls[i].saved.low,
+			controls[i].saved.high);
+	}
+}
+
+static void nmi_save_registers(void * dummy)
+{
+	int cpu = smp_processor_id();
+	struct op_msrs * msrs = &cpu_msrs[cpu];
+	model->fill_in_addresses(msrs);
+	nmi_cpu_save_registers(msrs);
+}
+
+int nmi_reserve_counters(void)
+{
+	if (!allocate_msrs())
+		return -ENOMEM;
+
+	/* We walk a thin line between law and rape here.
+	 * We need to be careful to install our NMI handler
+	 * without actually triggering any NMIs as this will
+	 * break the core code horrifically.
+	 */
+	/* Don't we need to do this on all CPUs?*/
+	if (reserve_lapic_nmi() < 0) {
+		free_msrs();
+		return -EBUSY;
+	}
+	/* We need to serialize save and setup for HT because the subset
+	 * of msrs are distinct for save and setup operations
+	 */
+	on_each_cpu(nmi_save_registers, NULL, 0, 1);
+	return 0;
+}
+
+static void nmi_cpu_setup(void * dummy)
+{
+	int cpu = smp_processor_id();
+	struct op_msrs * msrs = &cpu_msrs[cpu];
+	model->setup_ctrs(msrs);
+}
+
+int nmi_setup_events(void)
+{
+	on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
+	return 0;
+}
+
+int nmi_enable_virq()
+{
+	set_nmi_callback(nmi_callback);
+	return 0;
+}
+
+static void nmi_cpu_start(void * dummy)
+{
+	int cpu = smp_processor_id();
+	struct op_msrs const * msrs = &cpu_msrs[cpu];
+	saved_lvtpc[cpu] = apic_read(APIC_LVTPC);
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+	model->start(msrs);
+}
+
+int nmi_start(void)
+{
+	on_each_cpu(nmi_cpu_start, NULL, 0, 1);
+	nmi_profiling_started = 1;
+	return 0;
+}
+
+static void nmi_cpu_stop(void * dummy)
+{
+	unsigned int v;
+	int cpu = smp_processor_id();
+	struct op_msrs const * msrs = &cpu_msrs[cpu];
+	model->stop(msrs);
+
+	/* restoring APIC_LVTPC can trigger an apic error because the delivery
+	 * mode and vector nr combination can be illegal. That's by design: on
+	 * power on apic lvt contain a zero vector nr which are legal only for
+	 * NMI delivery mode. So inhibit apic err before restoring lvtpc
+	 */
+	if (!(apic_read(APIC_LVTPC) & APIC_DM_NMI)
+		|| (apic_read(APIC_LVTPC) & APIC_LVT_MASKED)) {
+		printk("nmi_stop: APIC not good %ul\n", apic_read(APIC_LVTPC));
+		mdelay(5000);
+	}
+	v = apic_read(APIC_LVTERR);
+	apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+	apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
+	apic_write(APIC_LVTERR, v);
+}
+ 
+void nmi_stop(void)
+{
+	nmi_profiling_started = 0;
+	on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
+	active_virq_count = 0;
+	passive_virq_count = 0;
+	other_virq_count = 0;
+	xen_count = 0;
+	dom_count = 0;
+}
+
+extern unsigned int read_ctr(struct op_msrs const * const msrs, int ctr);
+
+void nmi_sanity_check(struct cpu_user_regs *regs, int cpu)
+{
+	int i;
+	int masked = 0;
+
+	/* We may have missed some NMI interrupts if we were already 
+		in an NMI context at that time. If this happens, then 
+		the counters are not reset and in the case of P4, the 
+		APIC LVT disable mask is set. In both cases we end up 
+		losing samples. On P4, this condition can be detected 
+		by checking the APIC LVT mask. But in P6, we need to 
+		examine the counters for overflow. So, every timer 
+		interrupt, we check that everything is OK */
+
+	if (apic_read(APIC_LVTPC) & APIC_LVT_MASKED)
+		masked = 1;
+
+	nmi_callback(regs, cpu);
+
+	if (ovf && masked) {
+		if (is_active(current->domain))
+			current->domain->shared_info->nmi_restarts++;
+		else if (is_passive(current->domain))
+			primary_profiler->shared_info->nmi_restarts++;
+	}
+
+	/*if (jiffies %1000 == 0) {	
+		printk("cpu %d: sample count %d %d %d at %u\n", cpu, active_virq_count, passive_virq_count, other_virq_count, jiffies);
+		printk("other task id %d\n", other_id);
+		printk("%d in xen, %d in domain\n", xen_count, dom_count);
+		printk("counters %p %p\n", read_ctr(&cpu_msrs[cpu], 0), read_ctr(&cpu_msrs[cpu], 1));
+	}*/
+	
+
+	for (i = 0; i < adomains; i++)
+		if (test_and_clear_bit(i, &virq_ovf_pending[0])) {
+		  /* For now we do not support profiling of SMP guests */
+                  /* virq is delivered to first VCPU */  
+		  send_guest_virq(adomain_ptrs[i]->vcpu[0], VIRQ_PMC_OVF);
+		}
+}
+
+void nmi_disable_virq(void)
+{
+	unset_nmi_callback();
+} 
+
+static void nmi_restore_registers(struct op_msrs * msrs)
+{
+	unsigned int const nr_ctrs = model->num_counters;
+	unsigned int const nr_ctrls = model->num_controls; 
+	struct op_msr * counters = msrs->counters;
+	struct op_msr * controls = msrs->controls;
+	unsigned int i;
+
+	for (i = 0; i < nr_ctrls; ++i) {
+		wrmsr(controls[i].addr,
+			controls[i].saved.low,
+			controls[i].saved.high);
+	}
+ 
+	for (i = 0; i < nr_ctrs; ++i) {
+		wrmsr(counters[i].addr,
+			counters[i].saved.low,
+			counters[i].saved.high);
+	}
+}
+ 
+static void nmi_cpu_shutdown(void * dummy)
+{
+	int cpu = smp_processor_id();
+	struct op_msrs * msrs = &cpu_msrs[cpu];
+	nmi_restore_registers(msrs);
+}
+ 
+void nmi_release_counters(void)
+{
+	on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
+	release_lapic_nmi();
+	free_msrs();
+}
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+static int __init p4_init(void)
+{
+	__u8 cpu_model = current_cpu_data.x86_model;
+	printk("cpu model: %d\n", cpu_model);
+	if (cpu_model > 4)
+		return 0;
+
+#ifndef CONFIG_SMP
+	printk("model is op_p4_spec (uniprocessor)\n");
+	model = &op_p4_spec;
+	return 1;
+#else
+	//switch (smp_num_siblings) {
+	printk("model is op_p4_ht2_spec (SMP)\n");
+	if (cpu_has_ht) 
+	{
+	  model = &op_p4_ht2_spec;
+	  return 1;
+	}
+	else
+	{
+	  printk("model is op_p4_spec (SMP)\n");
+	  model = &op_p4_spec;
+	  return 1;
+	}
+#endif
+	return 0;
+}
+
+
+static int __init ppro_init(void)
+{
+	__u8 cpu_model = current_cpu_data.x86_model;
+
+	if (cpu_model > 0xd)
+		return 0;
+
+	model = &op_ppro_spec;
+	return 1;
+}
+
+int nmi_init(int *num_events, int *is_primary)
+{
+	__u8 vendor = current_cpu_data.x86_vendor;
+	__u8 family = current_cpu_data.x86;
+	int prim = 0;
+ 
+	if (!cpu_has_apic) {
+		printk("(XEN) cpu has no APIC\n");
+		return -ENODEV;
+	}
+
+	if (primary_profiler == NULL) {
+		primary_profiler = current->domain;
+		prim = 1;
+	}
+
+	if (primary_profiler != current->domain)
+		goto out;
+
+	printk("cpu vendor: %d\n", vendor);
+	printk("cpu family: %d\n", family);
+
+	switch (vendor) {
+		case X86_VENDOR_INTEL:
+			switch (family) {
+				/* Pentium IV */
+				case 0xf:
+					if (!p4_init())
+						return -ENODEV;
+					break;
+				/* A P6-class processor */
+				case 6:
+					if (!ppro_init())
+						return -ENODEV;
+					break;
+				default:
+					return -ENODEV;
+			}
+		case X86_VENDOR_AMD:
+			switch (family) {
+				case 6:
+					model = &op_athlon_spec;
+					break;
+				case 0xf:
+					model = &op_athlon_spec;
+					break;
+				}
+			break;
+		default:
+			return -ENODEV;
+	}
+out:
+	if (copy_to_user((void *)num_events, (void *)&model->num_counters, sizeof(int)))
+		return -EFAULT;
+	if (copy_to_user((void *)is_primary, (void *)&prim, sizeof(int)))
+		return -EFAULT;
+
+	return 0;
+}
+
diff -Naurp xen-unstable.hg-20050823-nooprofile/xen/arch/x86/oprofile/op_counter.h xen-unstable.hg-20050823/xen/arch/x86/oprofile/op_counter.h
--- xen-unstable.hg-20050823-nooprofile/xen/arch/x86/oprofile/op_counter.h	1969-12-31 18:00:00.000000000 -0600
+++ xen-unstable.hg-20050823/xen/arch/x86/oprofile/op_counter.h	2005-08-23 07:05:17.000000000 -0500
@@ -0,0 +1,33 @@
+/**
+ * @file op_counter.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+ 
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+ 
+#define OP_MAX_COUNTER 8
+ 
+/* Per-perfctr configuration as set via
+ * oprofilefs.
+ */
+struct op_counter_config {
+        unsigned long count;
+        unsigned long enabled;
+        unsigned long event;
+        unsigned long kernel;
+        unsigned long user;
+        unsigned long unit_mask;
+};
+
+extern struct op_counter_config counter_config[];
+
+#endif /* OP_COUNTER_H */
diff -Naurp xen-unstable.hg-20050823-nooprofile/xen/arch/x86/oprofile/op_model_athlon.c xen-unstable.hg-20050823/xen/arch/x86/oprofile/op_model_athlon.c
--- xen-unstable.hg-20050823-nooprofile/xen/arch/x86/oprofile/op_model_athlon.c	1969-12-31 18:00:00.000000000 -0600
+++ xen-unstable.hg-20050823/xen/arch/x86/oprofile/op_model_athlon.c	2005-08-24 04:34:41.000000000 -0500
@@ -0,0 +1,174 @@
+/**
+ * @file op_model_athlon.h
+ * athlon / K7 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ */
+#include <xen/types.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+
+
+// #include <linux/oprofile.h>
+// #include <asm/ptrace.h>
+// #include <asm/msr.h>
+
+#define NUM_COUNTERS 4
+#define NUM_CONTROLS 4
+
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
+#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+
+#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_CLEAR(x) (x &= (1<<21))
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_EVENT(val, e) (val |= e)
+
+static unsigned long reset_value[NUM_COUNTERS];
+
+extern void pmc_log_event(struct domain *d, u64 eip, int mode, int event);
+ 
+static void athlon_fill_in_addresses(struct op_msrs * const msrs)
+{
+	msrs->counters[0].addr = MSR_K7_PERFCTR0;
+	msrs->counters[1].addr = MSR_K7_PERFCTR1;
+	msrs->counters[2].addr = MSR_K7_PERFCTR2;
+	msrs->counters[3].addr = MSR_K7_PERFCTR3;
+
+	msrs->controls[0].addr = MSR_K7_EVNTSEL0;
+	msrs->controls[1].addr = MSR_K7_EVNTSEL1;
+	msrs->controls[2].addr = MSR_K7_EVNTSEL2;
+	msrs->controls[3].addr = MSR_K7_EVNTSEL3;
+}
+
+ 
+static void athlon_setup_ctrs(struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+ 
+	/* clear all counters */
+	for (i = 0 ; i < NUM_CONTROLS; ++i) {
+		CTRL_READ(low, high, msrs, i);
+		CTRL_CLEAR(low);
+		CTRL_WRITE(low, high, msrs, i);
+	}
+	
+	/* avoid a false detection of ctr overflows in NMI handler */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		CTR_WRITE(1, msrs, i);
+	}
+
+	/* enable active counters */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		if (counter_config[i].enabled) {
+			reset_value[i] = counter_config[i].count;
+
+			CTR_WRITE(counter_config[i].count, msrs, i);
+
+			CTRL_READ(low, high, msrs, i);
+			CTRL_CLEAR(low);
+			CTRL_SET_ENABLE(low);
+			CTRL_SET_USR(low, counter_config[i].user);
+			CTRL_SET_KERN(low, counter_config[i].kernel);
+			CTRL_SET_UM(low, counter_config[i].unit_mask);
+			CTRL_SET_EVENT(low, counter_config[i].event);
+			CTRL_WRITE(low, high, msrs, i);
+		} else {
+			reset_value[i] = 0;
+		}
+	}
+}
+
+ 
+//static int athlon_check_ctrs(struct pt_regs * const regs,
+//			     struct op_msrs const * const msrs)
+static int athlon_check_ctrs(unsigned int const cpu,
+                          struct op_msrs const * const msrs,
+                          struct cpu_user_regs * const regs)
+
+{
+	unsigned int low, high;
+	int i;
+        u64 eip = regs->eip;
+        int mode = 0;
+        struct vcpu *v = current;
+
+        if (KERNEL_MODE(v, regs))
+                mode = 1;
+        else if (RING_0(regs))
+                mode = 2;
+
+	for (i = 0 ; i < NUM_COUNTERS; ++i) {
+		CTR_READ(low, high, msrs, i);
+		if (CTR_OVERFLOWED(low)) {
+			//oprofile_add_sample(regs, i);
+			pmc_log_event(current->domain, eip, mode, i);
+			CTR_WRITE(reset_value[i], msrs, i);
+		}
+	}
+
+	/* See op_model_ppro.c */
+	return 1;
+}
+
+ 
+static void athlon_start(struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+		if (reset_value[i]) {
+			CTRL_READ(low, high, msrs, i);
+			CTRL_SET_ACTIVE(low);
+			CTRL_WRITE(low, high, msrs, i);
+		}
+	}
+}
+
+
+static void athlon_stop(struct op_msrs const * const msrs)
+{
+	unsigned int low,high;
+	int i;
+
+	/* Subtle: stop on all counters to avoid race with
+	 * setting our pm callback */
+	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
+		CTRL_READ(low, high, msrs, i);
+		CTRL_SET_INACTIVE(low);
+		CTRL_WRITE(low, high, msrs, i);
+	}
+}
+
+
+struct op_x86_model_spec const op_athlon_spec = {
+	.num_counters = NUM_COUNTERS,
+	.num_controls = NUM_CONTROLS,
+	.fill_in_addresses = &athlon_fill_in_addresses,
+	.setup_ctrs = &athlon_setup_ctrs,
+	.check_ctrs = &athlon_check_ctrs,
+	.start = &athlon_start,
+	.stop = &athlon_stop
+};
diff -Naurp xen-unstable.hg-20050823-nooprofile/xen/arch/x86/oprofile/op_model_p4.c xen-unstable.hg-20050823/xen/arch/x86/oprofile/op_model_p4.c
--- xen-unstable.hg-20050823-nooprofile/xen/arch/x86/oprofile/op_model_p4.c	1969-12-31 18:00:00.000000000 -0600
+++ xen-unstable.hg-20050823/xen/arch/x86/oprofile/op_model_p4.c	2005-08-23 07:05:17.000000000 -0500
@@ -0,0 +1,748 @@
+/**
+ * @file op_model_p4.c
+ * P4 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <xen/types.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_EVENTS 39
+
+#define NUM_COUNTERS_NON_HT 8
+#define NUM_ESCRS_NON_HT 45
+#define NUM_CCCRS_NON_HT 18
+#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
+
+#define NUM_COUNTERS_HT2 4
+#define NUM_ESCRS_HT2 23
+#define NUM_CCCRS_HT2 9
+#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
+
+static unsigned int num_counters = NUM_COUNTERS_NON_HT;
+
+
+/* this has to be checked dynamically since the
+   hyper-threadedness of a chip is discovered at
+   kernel boot-time. */
+static inline void setup_num_counters(void)
+{
+#ifdef CONFIG_SMP
+	if (cpu_has_ht)
+		num_counters = NUM_COUNTERS_HT2;
+#endif
+}
+
+static int inline addr_increment(void)
+{
+#ifdef CONFIG_SMP
+	return cpu_has_ht ? 2 : 1;
+#else
+	return 1;
+#endif
+}
+
+
+/* tables to simulate simplified hardware view of p4 registers */
+struct p4_counter_binding {
+	int virt_counter;
+	int counter_address;
+	int cccr_address;
+};
+
+struct p4_event_binding {
+	int escr_select;  /* value to put in CCCR */
+	int event_select; /* value to put in ESCR */
+	struct {
+		int virt_counter; /* for this counter... */
+		int escr_address; /* use this ESCR       */
+	} bindings[2];
+};
+
+/* nb: these CTR_* defines are a duplicate of defines in
+   event/i386.p4*events. */
+
+
+#define CTR_BPU_0      (1 << 0)
+#define CTR_MS_0       (1 << 1)
+#define CTR_FLAME_0    (1 << 2)
+#define CTR_IQ_4       (1 << 3)
+#define CTR_BPU_2      (1 << 4)
+#define CTR_MS_2       (1 << 5)
+#define CTR_FLAME_2    (1 << 6)
+#define CTR_IQ_5       (1 << 7)
+
+static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
+	{ CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
+	{ CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
+	{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
+	{ CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
+	{ CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
+	{ CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
+	{ CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
+	{ CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
+};
+
+#define NUM_UNUSED_CCCRS	NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
+
+/* All cccr we don't use. */
+static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
+	MSR_P4_BPU_CCCR1,	MSR_P4_BPU_CCCR3,
+	MSR_P4_MS_CCCR1,	MSR_P4_MS_CCCR3,
+	MSR_P4_FLAME_CCCR1,	MSR_P4_FLAME_CCCR3,
+	MSR_P4_IQ_CCCR0,	MSR_P4_IQ_CCCR1,
+	MSR_P4_IQ_CCCR2,	MSR_P4_IQ_CCCR3
+};
+
+/* p4 event codes in libop/op_event.h are indices into this table. */
+
+static struct p4_event_binding p4_events[NUM_EVENTS] = {
+	
+	{ /* BRANCH_RETIRED */
+		0x05, 0x06, 
+		{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+	
+	{ /* MISPRED_BRANCH_RETIRED */
+		0x04, 0x03, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+	},
+	
+	{ /* TC_DELIVER_MODE */
+		0x01, 0x01,
+		{ { CTR_MS_0, MSR_P4_TC_ESCR0},  
+		  { CTR_MS_2, MSR_P4_TC_ESCR1} }
+	},
+	
+	{ /* BPU_FETCH_REQUEST */
+		0x00, 0x03, 
+		{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
+		  { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
+	},
+
+	{ /* ITLB_REFERENCE */
+		0x03, 0x18,
+		{ { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
+	},
+
+	{ /* MEMORY_CANCEL */
+		0x05, 0x02,
+		{ { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
+	},
+
+	{ /* MEMORY_COMPLETE */
+		0x02, 0x08,
+		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+	},
+
+	{ /* LOAD_PORT_REPLAY */
+		0x02, 0x04, 
+		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+	},
+
+	{ /* STORE_PORT_REPLAY */
+		0x02, 0x05,
+		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+	},
+
+	{ /* MOB_LOAD_REPLAY */
+		0x02, 0x03,
+		{ { CTR_BPU_0, MSR_P4_MOB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
+	},
+
+	{ /* PAGE_WALK_TYPE */
+		0x04, 0x01,
+		{ { CTR_BPU_0, MSR_P4_PMH_ESCR0},
+		  { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
+	},
+
+	{ /* BSQ_CACHE_REFERENCE */
+		0x07, 0x0c, 
+		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+		  { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
+	},
+
+	{ /* IOQ_ALLOCATION */
+		0x06, 0x03, 
+		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+		  { 0, 0 } }
+	},
+
+	{ /* IOQ_ACTIVE_ENTRIES */
+		0x06, 0x1a, 
+		{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
+		  { 0, 0 } }
+	},
+
+	{ /* FSB_DATA_ACTIVITY */
+		0x06, 0x17, 
+		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+	},
+
+	{ /* BSQ_ALLOCATION */
+		0x07, 0x05, 
+		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+		  { 0, 0 } }
+	},
+
+	{ /* BSQ_ACTIVE_ENTRIES */
+		0x07, 0x06,
+		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},  
+		  { 0, 0 } }
+	},
+
+	{ /* X87_ASSIST */
+		0x05, 0x03, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* SSE_INPUT_ASSIST */
+		0x01, 0x34,
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* PACKED_SP_UOP */
+		0x01, 0x08, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* PACKED_DP_UOP */
+		0x01, 0x0c, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* SCALAR_SP_UOP */
+		0x01, 0x0a, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* SCALAR_DP_UOP */
+		0x01, 0x0e,
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* 64BIT_MMX_UOP */
+		0x01, 0x02, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* 128BIT_MMX_UOP */
+		0x01, 0x1a, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+
+	{ /* X87_FP_UOP */
+		0x01, 0x04, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* X87_SIMD_MOVES_UOP */
+		0x01, 0x2e, 
+		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+	},
+  
+	{ /* MACHINE_CLEAR */
+		0x05, 0x02, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* GLOBAL_POWER_EVENTS */
+		0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
+		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+	},
+  
+	{ /* TC_MS_XFER */
+		0x00, 0x05, 
+		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
+		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
+	},
+
+	{ /* UOP_QUEUE_WRITES */
+		0x00, 0x09,
+		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
+		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
+	},
+
+	{ /* FRONT_END_EVENT */
+		0x05, 0x08,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* EXECUTION_EVENT */
+		0x05, 0x0c,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* REPLAY_EVENT */
+		0x05, 0x09,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+	},
+
+	{ /* INSTR_RETIRED */
+		0x04, 0x02, 
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+	},
+
+	{ /* UOPS_RETIRED */
+		0x04, 0x01,
+		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+	},
+
+	{ /* UOP_TYPE */    
+		0x02, 0x02, 
+		{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
+		  { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
+	},
+
+	{ /* RETIRED_MISPRED_BRANCH_TYPE */
+		0x02, 0x05, 
+		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+	},
+
+	{ /* RETIRED_BRANCH_TYPE */
+		0x02, 0x04,
+		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+	}
+};
+
+
+#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
+
+#define ESCR_RESERVED_BITS 0x80000003
+#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
+#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
+#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
+#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
+#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
+#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
+#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
+#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
+#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0)
+
+#define CCCR_RESERVED_BITS 0x38030FFF
+#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
+#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
+#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
+#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
+#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
+#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
+#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
+#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0)
+#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
+#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
+
+#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0)
+#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0)
+#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
+
+
+/* this assigns a "stagger" to the current CPU, which is used throughout
+   the code in this module as an extra array offset, to select the "even"
+   or "odd" part of all the divided resources. */
+static unsigned int get_stagger(void)
+{
+#ifdef CONFIG_SMP
+	/*int cpu = smp_processor_id();
+	return (cpu != first_cpu(cpu_sibling_map[cpu]));*/
+	/* We want the two logical cpus of a physical cpu to use
+	disjoint set of counters. The following code is wrong. */
+	return 0;
+#endif	
+	return 0;
+}
+
+
+/* finally, mediate access to a real hardware counter
+   by passing a "virtual" counter numer to this macro,
+   along with your stagger setting. */
+#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
+
+static unsigned long reset_value[NUM_COUNTERS_NON_HT];
+
+
+static void p4_fill_in_addresses(struct op_msrs * const msrs)
+{
+	unsigned int i; 
+	unsigned int addr, stag;
+
+	setup_num_counters();
+	stag = get_stagger();
+
+	/* the counter registers we pay attention to */
+	for (i = 0; i < num_counters; ++i) {
+		msrs->counters[i].addr = 
+			p4_counters[VIRT_CTR(stag, i)].counter_address;
+	}
+
+	/* FIXME: bad feeling, we don't save the 10 counters we don't use. */
+
+	/* 18 CCCR registers */
+	for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
+	     addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
+		msrs->controls[i].addr = addr;
+	}
+	
+	/* 43 ESCR registers in three or four discontiguous group */
+	for (addr = MSR_P4_BSU_ESCR0 + stag;
+	     addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
+		msrs->controls[i].addr = addr;
+	}
+
+	/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
+	 * to avoid special case in nmi_{save|restore}_registers() */
+	if (boot_cpu_data.x86_model >= 0x3) {
+		for (addr = MSR_P4_BSU_ESCR0 + stag;
+		     addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
+			msrs->controls[i].addr = addr;
+		}
+	} else {
+		for (addr = MSR_P4_IQ_ESCR0 + stag;
+		     addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
+			msrs->controls[i].addr = addr;
+		}
+	}
+
+	for (addr = MSR_P4_RAT_ESCR0 + stag;
+	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+		msrs->controls[i].addr = addr;
+	}
+	
+	for (addr = MSR_P4_MS_ESCR0 + stag;
+	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 
+		msrs->controls[i].addr = addr;
+	}
+	
+	for (addr = MSR_P4_IX_ESCR0 + stag;
+	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 
+		msrs->controls[i].addr = addr;
+	}
+
+	/* there are 2 remaining non-contiguously located ESCRs */
+
+	if (num_counters == NUM_COUNTERS_NON_HT) {		
+		/* standard non-HT CPUs handle both remaining ESCRs*/
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+	} else if (stag == 0) {
+		/* HT CPUs give the first remainder to the even thread, as
+		   the 32nd control register */
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+	} else {
+		/* and two copies of the second to the odd thread,
+		   for the 22st and 23nd control registers */
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+		msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+	}
+}
+
+
+static void pmc_setup_one_p4_counter(unsigned int ctr)
+{
+	int i;
+	int const maxbind = 2;
+	unsigned int cccr = 0;
+	unsigned int escr = 0;
+	unsigned int high = 0;
+	unsigned int counter_bit;
+	struct p4_event_binding *ev = NULL;
+	unsigned int stag;
+
+	stag = get_stagger();
+	
+	/* convert from counter *number* to counter *bit* */
+	counter_bit = 1 << VIRT_CTR(stag, ctr);
+	
+	/* find our event binding structure. */
+	if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
+		printk(KERN_ERR 
+		       "oprofile: P4 event code 0x%lx out of range\n", 
+		       counter_config[ctr].event);
+		return;
+	}
+	
+	ev = &(p4_events[counter_config[ctr].event - 1]);
+	
+	for (i = 0; i < maxbind; i++) {
+		if (ev->bindings[i].virt_counter & counter_bit) {
+
+			/* modify ESCR */
+			ESCR_READ(escr, high, ev, i);
+			ESCR_CLEAR(escr);
+			if (stag == 0) {
+				ESCR_SET_USR_0(escr, counter_config[ctr].user);
+				ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
+			} else {
+				ESCR_SET_USR_1(escr, counter_config[ctr].user);
+				ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
+			}
+			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
+			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);			
+			ESCR_WRITE(escr, high, ev, i);
+		       
+			/* modify CCCR */
+			CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
+			CCCR_CLEAR(cccr);
+			CCCR_SET_REQUIRED_BITS(cccr);
+			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
+			if (stag == 0) {
+				CCCR_SET_PMI_OVF_0(cccr);
+			} else {
+				CCCR_SET_PMI_OVF_1(cccr);
+			}
+			CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
+			return;
+		}
+	}
+
+	printk(KERN_ERR 
+	       "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
+	       counter_config[ctr].event, stag, ctr);
+}
+
+
+static void p4_setup_ctrs(struct op_msrs const * const msrs)
+{
+	unsigned int i;
+	unsigned int low, high;
+	unsigned int addr;
+	unsigned int stag;
+
+	stag = get_stagger();
+
+	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
+	if (! MISC_PMC_ENABLED_P(low)) {
+		printk(KERN_ERR "oprofile: P4 PMC not available\n");
+		return;
+	}
+
+	/* clear the cccrs we will use */
+	for (i = 0 ; i < num_counters ; i++) {
+		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+		CCCR_CLEAR(low);
+		CCCR_SET_REQUIRED_BITS(low);
+		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+	}
+
+	/* clear cccrs outside our concern */
+	for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
+		rdmsr(p4_unused_cccr[i], low, high);
+		CCCR_CLEAR(low);
+		CCCR_SET_REQUIRED_BITS(low);
+		wrmsr(p4_unused_cccr[i], low, high);
+	}
+
+	/* clear all escrs (including those outside our concern) */
+	for (addr = MSR_P4_BSU_ESCR0 + stag;
+	     addr <  MSR_P4_IQ_ESCR0; addr += addr_increment()) {
+		wrmsr(addr, 0, 0);
+	}
+
+	/* On older models clear also MSR_P4_IQ_ESCR0/1 */
+	if (boot_cpu_data.x86_model < 0x3) {
+		wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
+		wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
+	}
+
+	for (addr = MSR_P4_RAT_ESCR0 + stag;
+	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+		wrmsr(addr, 0, 0);
+	}
+	
+	for (addr = MSR_P4_MS_ESCR0 + stag;
+	     addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ 
+		wrmsr(addr, 0, 0);
+	}
+	
+	for (addr = MSR_P4_IX_ESCR0 + stag;
+	     addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ 
+		wrmsr(addr, 0, 0);
+	}
+
+	if (num_counters == NUM_COUNTERS_NON_HT) {		
+		wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
+		wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
+	} else if (stag == 0) {
+		wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
+	} else {
+		wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
+	}		
+	
+	/* setup all counters */
+	for (i = 0 ; i < num_counters ; ++i) {
+		if (counter_config[i].enabled) {
+			reset_value[i] = counter_config[i].count;
+			pmc_setup_one_p4_counter(i);
+			CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
+		} else {
+			reset_value[i] = 0;
+		}
+	}
+}
+
+
+extern void pmc_log_event(struct domain *d, u64 eip, int mode, int event);
+extern int is_profiled(struct domain * d);
+extern struct domain * primary_profiler;
+
+static int p4_check_ctrs(unsigned int const cpu, 
+			  struct op_msrs const * const msrs,
+			  struct cpu_user_regs * const regs)
+{
+	unsigned long ctr, low, high, stag, real;
+	int i, ovf = 0;
+	u64 eip = regs->eip;
+	int mode = 0;
+	struct vcpu *v = current;
+
+	//if (RING_1(regs))
+	if (KERNEL_MODE(v, regs))
+		mode = 1;
+	else if (RING_0(regs))
+		mode = 2;
+
+	stag = get_stagger();
+
+	for (i = 0; i < num_counters; ++i) {
+		if (!reset_value[i]) 
+			continue;
+
+		/* 
+		 * there is some eccentricity in the hardware which
+		 * requires that we perform 2 extra corrections:
+		 *
+		 * - check both the CCCR:OVF flag for overflow and the
+		 *   counter high bit for un-flagged overflows.
+		 *
+		 * - write the counter back twice to ensure it gets
+		 *   updated properly.
+		 * 
+		 * the former seems to be related to extra NMIs happening
+		 * during the current NMI; the latter is reported as errata
+		 * N15 in intel doc 249199-029, pentium 4 specification
+		 * update, though their suggested work-around does not
+		 * appear to solve the problem.
+		 */
+		
+		real = VIRT_CTR(stag, i);
+
+		CCCR_READ(low, high, real);
+ 		CTR_READ(ctr, high, real);
+		if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
+			pmc_log_event(current->domain, eip, mode, i);
+			CTR_WRITE(reset_value[i], real);
+			CCCR_CLEAR_OVF(low);
+			CCCR_WRITE(low, high, real);
+			CTR_WRITE(reset_value[i], real);
+			ovf = 1;
+		}
+	}
+
+	/* P4 quirk: you have to re-unmask the apic vector */
+	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+	/* See op_model_ppro.c */
+	return ovf;
+}
+
+
+static void p4_start(struct op_msrs const * const msrs)
+{
+	unsigned int low, high, stag;
+	int i;
+
+	stag = get_stagger();
+
+	for (i = 0; i < num_counters; ++i) {
+		if (!reset_value[i])
+			continue;
+		CCCR_READ(low, high, VIRT_CTR(stag, i));
+		CCCR_SET_ENABLE(low);
+		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+	}
+}
+
+
+static void p4_stop(struct op_msrs const * const msrs)
+{
+	unsigned int low, high, stag;
+	int i;
+
+	stag = get_stagger();
+
+	for (i = 0; i < num_counters; ++i) {
+		CCCR_READ(low, high, VIRT_CTR(stag, i));
+		CCCR_SET_DISABLE(low);
+		CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+	}
+}
+
+
+#ifdef CONFIG_SMP
+struct op_x86_model_spec const op_p4_ht2_spec = {
+	.num_counters = NUM_COUNTERS_HT2,
+	.num_controls = NUM_CONTROLS_HT2,
+	.fill_in_addresses = &p4_fill_in_addresses,
+	.setup_ctrs = &p4_setup_ctrs,
+	.check_ctrs = &p4_check_ctrs,
+	.start = &p4_start,
+	.stop = &p4_stop
+};
+#endif
+
+struct op_x86_model_spec const op_p4_spec = {
+	.num_counters = NUM_COUNTERS_NON_HT,
+	.num_controls = NUM_CONTROLS_NON_HT,
+	.fill_in_addresses = &p4_fill_in_addresses,
+	.setup_ctrs = &p4_setup_ctrs,
+	.check_ctrs = &p4_check_ctrs,
+	.start = &p4_start,
+	.stop = &p4_stop
+};
diff -Naurp xen-unstable.hg-20050823-nooprofile/xen/arch/x86/oprofile/op_model_ppro.c xen-unstable.hg-20050823/xen/arch/x86/oprofile/op_model_ppro.c
--- xen-unstable.hg-20050823-nooprofile/xen/arch/x86/oprofile/op_model_ppro.c	1969-12-31 18:00:00.000000000 -0600
+++ xen-unstable.hg-20050823/xen/arch/x86/oprofile/op_model_ppro.c	2005-08-23 07:05:17.000000000 -0500
@@ -0,0 +1,168 @@
+/**
+ * @file op_model_ppro.h
+ * pentium pro / P6 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <xen/types.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+ 
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_COUNTERS 2
+#define NUM_CONTROLS 2
+
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0)
+#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+
+#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_CLEAR(x) (x &= (1<<21))
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_EVENT(val, e) (val |= e)
+
+static unsigned long reset_value[NUM_COUNTERS];
+ 
+static void ppro_fill_in_addresses(struct op_msrs * const msrs)
+{
+	msrs->counters[0].addr = MSR_P6_PERFCTR0;
+	msrs->counters[1].addr = MSR_P6_PERFCTR1;
+	
+	msrs->controls[0].addr = MSR_P6_EVNTSEL0;
+	msrs->controls[1].addr = MSR_P6_EVNTSEL1;
+}
+
+
+static void ppro_setup_ctrs(struct op_msrs const * const msrs)
+{
+	unsigned int low, high;
+	int i;
+
+	/* clear all counters */
+	for (i = 0 ; i < NUM_CONTROLS; ++i) {
+		CTRL_READ(low, high, msrs, i);
+		CTRL_CLEAR(low);
+		CTRL_WRITE(low, high, msrs, i);
+	}
+	
+	/* avoid a false detection of ctr overflows in NMI handler */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		CTR_WRITE(1, msrs, i);
+	}
+
+	/* enable active counters */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		if (counter_config[i].enabled) {
+			reset_value[i] = counter_config[i].count;
+
+			CTR_WRITE(counter_config[i].count, msrs, i);
+
+			CTRL_READ(low, high, msrs, i);
+			CTRL_CLEAR(low);
+			CTRL_SET_ENABLE(low);
+			CTRL_SET_USR(low, counter_config[i].user);
+			CTRL_SET_KERN(low, counter_config[i].kernel);
+			CTRL_SET_UM(low, counter_config[i].unit_mask);
+			CTRL_SET_EVENT(low, counter_config[i].event);
+			CTRL_WRITE(low, high, msrs, i);
+		}
+	}
+}
+
+extern void pmc_log_event(struct domain *d, u64 eip, int mode, int event);
+extern int is_profiled(struct domain * d);
+extern struct domain * primary_profiler;
+
+static int ppro_check_ctrs(unsigned int const cpu, 
+			    struct op_msrs const * const msrs,
+			    struct cpu_user_regs * const regs)
+{
+	unsigned int low, high;
+	int i, ovf = 0;
+	u64 eip = regs->eip;
+	int mode = 0;
+
+	if (RING_1(regs)) 
+		mode = 1;
+	else if (RING_0(regs))
+		mode = 2;
+
+	for (i = 0 ; i < NUM_COUNTERS; ++i) {
+		CTR_READ(low, high, msrs, i);
+		if (CTR_OVERFLOWED(low)) {
+			pmc_log_event(current->domain, eip, mode, i);
+			CTR_WRITE(reset_value[i], msrs, i);
+			ovf = 1;
+		}
+	}
+
+	/* Only P6 based Pentium M need to re-unmask the apic vector but it
+	 * doesn't hurt other P6 variant */
+	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+	/* We can't work out if we really handled an interrupt. We
+	 * might have caught a *second* counter just after overflowing
+	 * the interrupt for this counter then arrives
+	 * and we don't find a counter that's overflowed, so we
+	 * would return 0 and get dazed + confused. Instead we always
+	 * assume we found an overflow. This sucks.
+	 */
+	return ovf;
+}
+
+ 
+static void ppro_start(struct op_msrs const * const msrs)
+{
+	unsigned int low,high;
+	CTRL_READ(low, high, msrs, 0);
+	CTRL_SET_ACTIVE(low);
+	CTRL_WRITE(low, high, msrs, 0);
+}
+
+static void ppro_stop(struct op_msrs const * const msrs)
+{
+	unsigned int low,high;
+	CTRL_READ(low, high, msrs, 0);
+	CTRL_SET_INACTIVE(low);
+	CTRL_WRITE(low, high, msrs, 0);
+}
+
+unsigned int read_ctr(struct op_msrs const * const msrs, int i)
+{
+	unsigned int low, high;
+	CTR_READ(low, high, msrs, i);
+	return low;
+}
+
+struct op_x86_model_spec const op_ppro_spec = {
+	.num_counters = NUM_COUNTERS,
+	.num_controls = NUM_CONTROLS,
+	.fill_in_addresses = &ppro_fill_in_addresses,
+	.setup_ctrs = &ppro_setup_ctrs,
+	.check_ctrs = &ppro_check_ctrs,
+	.start = &ppro_start,
+	.stop = &ppro_stop
+};
diff -Naurp xen-unstable.hg-20050823-nooprofile/xen/arch/x86/oprofile/op_x86_model.h xen-unstable.hg-20050823/xen/arch/x86/oprofile/op_x86_model.h
--- xen-unstable.hg-20050823-nooprofile/xen/arch/x86/oprofile/op_x86_model.h	1969-12-31 18:00:00.000000000 -0600
+++ xen-unstable.hg-20050823/xen/arch/x86/oprofile/op_x86_model.h	2005-08-23 07:05:17.000000000 -0500
@@ -0,0 +1,55 @@
+/**
+ * @file op_x86_model.h
+ * interface to x86 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#ifndef OP_X86_MODEL_H
+#define OP_X86_MODEL_H
+
+struct op_saved_msr {
+	unsigned int high;
+	unsigned int low;
+};
+
+struct op_msr {
+	unsigned long addr;
+	struct op_saved_msr saved;
+};
+
+struct op_msrs {
+	struct op_msr * counters;
+	struct op_msr * controls;
+};
+
+struct pt_regs;
+
+/* The model vtable abstracts the differences between
+ * various x86 CPU model's perfctr support.
+ */
+struct op_x86_model_spec {
+	unsigned int const num_counters;
+	unsigned int const num_controls;
+	void (*fill_in_addresses)(struct op_msrs * const msrs);
+	void (*setup_ctrs)(struct op_msrs const * const msrs);
+	int (*check_ctrs)(unsigned int const cpu, 
+		struct op_msrs const * const msrs,
+		struct cpu_user_regs * const regs);
+	void (*start)(struct op_msrs const * const msrs);
+	void (*stop)(struct op_msrs const * const msrs);
+};
+
+extern struct op_x86_model_spec const op_ppro_spec;
+extern struct op_x86_model_spec const op_p4_spec;
+extern struct op_x86_model_spec const op_p4_ht2_spec;
+extern struct op_x86_model_spec const op_athlon_spec;
+
+#endif /* OP_X86_MODEL_H */
diff -Naurp xen-unstable.hg-20050823-nooprofile/xen/arch/x86/oprofile/pmc.c xen-unstable.hg-20050823/xen/arch/x86/oprofile/pmc.c
--- xen-unstable.hg-20050823-nooprofile/xen/arch/x86/oprofile/pmc.c	1969-12-31 18:00:00.000000000 -0600
+++ xen-unstable.hg-20050823/xen/arch/x86/oprofile/pmc.c	2005-08-23 07:05:17.000000000 -0500
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * written by Aravind Menon, email: xenoprof@groups.hp.com
+ */
+
+#include <xen/sched.h>
+#include <asm/current.h>
+
+#include "op_counter.h"
+
+int active_domains[MAX_OPROF_DOMAINS];
+int passive_domains[MAX_OPROF_DOMAINS];
+unsigned int adomains = 0;
+unsigned int pdomains = 0;
+unsigned int activated = 0;
+
+#define VIRQ_BITMASK_SIZE	(MAX_OPROF_DOMAINS/32 + 1)
+
+struct domain * primary_profiler = NULL;
+struct domain * adomain_ptrs[MAX_OPROF_DOMAINS];
+unsigned int virq_ovf_pending[VIRQ_BITMASK_SIZE];
+
+int is_active(struct domain *d) 
+{
+	int i;
+	for (i = 0; i < adomains; i++)
+		if (d->domain_id == active_domains[i])
+			return 1;
+	return 0;
+}
+
+int active_id(struct domain *d)
+{
+	int i;
+	for (i = 0; i < adomains; i++)
+		if (d == adomain_ptrs[i])
+			return i;
+	return -1;
+}
+
+void free_adomain_ptrs() 
+{
+	int i;
+	int num = adomains;
+
+	adomains = 0;
+	for (i = 0; i < VIRQ_BITMASK_SIZE; i++)
+		virq_ovf_pending[i] = 0;
+
+	for (i = 0; i < num; i++) {
+		put_domain(adomain_ptrs[i]);
+		adomain_ptrs[i] = NULL;
+	}
+}
+
+int set_adomain_ptrs(int num)
+{
+	int i;
+	struct domain *d;
+
+	for (i = 0; i < VIRQ_BITMASK_SIZE; i++)
+		virq_ovf_pending[i] = 0;
+
+	for (i = 0; i < num; i++) {
+		d = find_domain_by_id(active_domains[i]);
+		if (!d) {
+			free_adomain_ptrs();
+			return -EFAULT;
+		}
+		adomain_ptrs[i] = d;
+		adomains++;
+	}
+	return 0;
+}
+
+int set_active(struct domain *d)
+{
+	if (is_active(d))
+		return 0;
+	/* hack if we run out of space */
+	if (adomains >= MAX_OPROF_DOMAINS) {
+		adomains--;
+		put_domain(adomain_ptrs[adomains]);
+	}
+	active_domains[adomains] = d->domain_id;
+	if (get_domain(d))
+		adomain_ptrs[adomains++] = d;
+	else {
+		free_adomain_ptrs();
+		return -EFAULT;
+	}
+	return 0;
+}
+
+int is_passive(struct domain *d)
+{
+	int i;
+	for (i = 0; i < pdomains; i++)
+		if (d->domain_id == passive_domains[i])
+			return 1;
+	return 0;
+}
+
+int is_profiled(struct domain *d)
+{
+	if (is_active(d) || is_passive(d))
+		return 1;
+	return 0;
+}
+
+void pmc_log_event(struct domain *d, u64 eip, int mode, int event) 
+{
+	shared_info_t *s = NULL;
+	struct domain *dest = d;
+	int head;
+	int tail;
+
+	if (!is_profiled(d))
+		return;
+
+	if (!is_passive(d)) {
+		s = dest->shared_info;
+		head = s->event_head;
+		tail = s->event_tail;
+		if ((head == tail - 1) || 
+		    (head == MAX_OPROF_EVENTS - 1 && tail == 0)) {
+			s->losing_samples = 1;
+			s->samples_lost++;
+		}
+		else {
+			s->event_log[head].eip = eip;
+			s->event_log[head].mode = mode;
+			s->event_log[head].event = event;
+			head++;
+			if (head >= MAX_OPROF_EVENTS)
+				head = 0;
+			s->event_head = head;
+		}
+	}
+	/* passive domains */
+	else {
+		dest = primary_profiler;
+		s = dest->shared_info;
+		head = s->event_head;
+		tail = s->event_tail;
+
+		/* We use the following inefficient format for logging 
+		   events from other domains. We put a special record 
+                   indicating that the next record is for another domain. 
+		   This is done for each sample from another domain */ 
+
+		head = s->event_head;
+		if (head >= MAX_OPROF_EVENTS)
+			head = 0;
+        	/* for passive domains we need to have at least two 
+		   entries empty in the buffer */
+		if ((head == tail - 1) || 
+		    (head == tail - 2) ||
+		    (head == MAX_OPROF_EVENTS - 1 && tail <= 1) ||
+		    (head == MAX_OPROF_EVENTS - 2 && tail == 0) ) {
+			s->losing_samples = 1;
+			s->samples_lost++;
+		}
+		else {
+			s->event_log[head].eip = ~1;
+			s->event_log[head].mode = ~0;
+			s->event_log[head].event = d->domain_id;
+			head++;
+			if (head >= MAX_OPROF_EVENTS)
+				head = 0;
+			s->event_log[head].eip = eip;
+			s->event_log[head].mode = mode;
+			s->event_log[head].event = event;
+			head++;
+			if (head >= MAX_OPROF_EVENTS)
+				head = 0;
+       			s->event_head = head;
+		}
+	}
+}
+
+static void pmc_event_init(struct domain *d)
+{
+	shared_info_t *s = d->shared_info;
+	s->event_head = 0;
+	s->event_tail = 0;
+	s->losing_samples = 0;
+	s->samples_lost = 0;
+	s->nmi_restarts = 0;
+	s->active_samples = 0;
+	s->passive_samples = 0;
+	s->other_samples = 0;
+}
+
+extern int nmi_init(int *num_events, int *is_primary);
+extern int nmi_reserve_counters(void);
+extern int nmi_setup_events(void);
+extern int nmi_enable_virq(void);
+extern int nmi_start(void);
+extern void nmi_stop(void);
+extern void nmi_disable_virq(void);
+extern void nmi_release_counters(void);
+
+#define PRIV_OP(op)	((op == PMC_SET_ACTIVE) || (op == PMC_SET_PASSIVE) || (op == PMC_RESERVE_COUNTERS) \
+			|| (op == PMC_SETUP_EVENTS) || (op == PMC_START) || (op == PMC_STOP) \
+			|| (op == PMC_RELEASE_COUNTERS) || (op == PMC_SHUTDOWN))
+
+int do_pmc_op(int op, u64 arg1, u64 arg2)
+{
+	int ret = 0;
+
+	if (PRIV_OP(op) && current->domain != primary_profiler)
+		return -EPERM;
+
+	switch (op) {
+		case PMC_INIT:
+			printk("PMC_INIT]\n");
+			ret = nmi_init((int *)arg1, (int *)arg2);
+			printk("nmi_init returned %d\n", ret);
+			break;
+
+		case PMC_SET_ACTIVE:
+			printk("PMC_SETACTIVE]\n");
+			if (adomains != 0)
+				return -EPERM;
+			if (copy_from_user((void *)&active_domains,
+				(void *)arg1, arg2*sizeof(int)))
+				return -EFAULT;
+			if (set_adomain_ptrs(arg2))
+				return -EFAULT;
+			if (set_active(current->domain))
+				return -EFAULT;
+			break;
+
+		case PMC_SET_PASSIVE:
+			printk("PMC_SETPASSIVE\n");
+			if (pdomains != 0)
+				return -EPERM;
+			if (copy_from_user((void *)&passive_domains,
+				(void *)arg1, arg2*sizeof(int)))
+				return -EFAULT;
+			pdomains = arg2;
+			break;
+
+		case PMC_RESERVE_COUNTERS:
+			printk("PMC_RESERVE_COUNTERS\n");
+			ret = nmi_reserve_counters();
+			break;
+
+		case PMC_SETUP_EVENTS:
+			printk("PMV_SETUP_EVENTS\n");
+			if (copy_from_user((void *)&counter_config, 
+				(void *)arg1, arg2*sizeof(struct op_counter_config)))
+				return -EFAULT;
+			ret = nmi_setup_events();
+			break;
+
+		case PMC_ENABLE_VIRQ:
+			printk("PMC_ENABLE_VIRQ\n");
+			if (!is_active(current->domain)) {
+				if (current->domain != primary_profiler)
+					return -EPERM;
+				else
+					set_active(current->domain);
+			}
+			ret = nmi_enable_virq();
+			pmc_event_init(current->domain);
+			activated++;
+			break;
+
+		case PMC_START:
+			printk("PMC_START\n");
+			if (activated < adomains)
+				return -EPERM;
+			ret = nmi_start();
+			break;
+
+		case PMC_STOP:
+			printk("PMC_STOP\n");
+			nmi_stop();
+			break;
+
+		case PMC_DISABLE_VIRQ:
+			printk("PMC_DISBALE_VIRQ\n");
+			if (!is_active(current->domain))
+				return -EPERM;
+			nmi_disable_virq();
+			activated--;
+			break;
+
+		case PMC_RELEASE_COUNTERS:
+			printk("PMC_RELEASE_COUNTERS\n");
+			nmi_release_counters();
+			break;
+
+		case PMC_SHUTDOWN:
+			printk("PMC_SHUTDOWN\n");
+			free_adomain_ptrs();
+			pdomains = 0;
+			activated = 0;
+			primary_profiler = NULL;
+			break;
+
+		default:
+			ret = -EINVAL;
+	}
+	return ret;
+}
diff -Naurp xen-unstable.hg-20050823-nooprofile/xen/arch/x86/traps.c xen-unstable.hg-20050823/xen/arch/x86/traps.c
--- xen-unstable.hg-20050823-nooprofile/xen/arch/x86/traps.c	2005-08-24 07:44:52.000000000 -0500
+++ xen-unstable.hg-20050823/xen/arch/x86/traps.c	2005-08-23 07:05:17.000000000 -0500
@@ -2,6 +2,10 @@
  * arch/x86/traps.c
  * 
  * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
+ *
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  * 
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -54,6 +58,7 @@
 #include <asm/debugger.h>
 #include <asm/msr.h>
 #include <asm/x86_emulate.h>
+#include <asm/nmi.h>
 
 /*
  * opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
@@ -1040,7 +1045,7 @@ static void unknown_nmi_error(unsigned c
     printk("Do you have a strange power saving mode enabled?\n");
 }
 
-asmlinkage void do_nmi(struct cpu_user_regs *regs, unsigned long reason)
+static void default_do_nmi(struct cpu_user_regs * regs, unsigned long reason)
 {
     ++nmi_count(smp_processor_id());
 
@@ -1055,6 +1060,35 @@ asmlinkage void do_nmi(struct cpu_user_r
         unknown_nmi_error((unsigned char)(reason&0xff));
 }
 
+static int dummy_nmi_callback(struct cpu_user_regs * regs, int cpu)
+{
+        return 0;
+}
+
+static nmi_callback_t nmi_callback = dummy_nmi_callback;
+
+asmlinkage void do_nmi(struct cpu_user_regs * regs, unsigned long reason)
+{
+	int cpu;
+    cpu = smp_processor_id();
+
+    if (!nmi_callback(regs, cpu)) 
+        default_do_nmi(regs, reason);
+}
+
+void set_nmi_callback(nmi_callback_t callback)
+{
+    nmi_callback = callback;
+}
+ 
+void unset_nmi_callback(void)
+{
+    nmi_callback = dummy_nmi_callback;
+}
+ 
+EXPORT_SYMBOL(set_nmi_callback);
+EXPORT_SYMBOL(unset_nmi_callback);
+ 
 asmlinkage int math_state_restore(struct cpu_user_regs *regs)
 {
     /* Prevent recursion. */
diff -Naurp xen-unstable.hg-20050823-nooprofile/xen/arch/x86/x86_32/entry.S xen-unstable.hg-20050823/xen/arch/x86/x86_32/entry.S
--- xen-unstable.hg-20050823-nooprofile/xen/arch/x86/x86_32/entry.S	2005-08-24 07:44:52.000000000 -0500
+++ xen-unstable.hg-20050823/xen/arch/x86/x86_32/entry.S	2005-08-23 07:05:17.000000000 -0500
@@ -763,7 +763,8 @@ ENTRY(hypercall_table)
         .long do_boot_vcpu
         .long do_ni_hypercall       /* 25 */
         .long do_mmuext_op
-        .long do_acm_op             /* 27 */
+        .long do_acm_op
+	.long do_pmc_op             /* 28 */
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .long do_ni_hypercall
         .endr
diff -Naurp xen-unstable.hg-20050823-nooprofile/xen/arch/x86/x86_64/entry.S xen-unstable.hg-20050823/xen/arch/x86/x86_64/entry.S
--- xen-unstable.hg-20050823-nooprofile/xen/arch/x86/x86_64/entry.S	2005-08-24 07:44:52.000000000 -0500
+++ xen-unstable.hg-20050823/xen/arch/x86/x86_64/entry.S	2005-08-23 07:05:17.000000000 -0500
@@ -593,6 +593,7 @@ ENTRY(hypercall_table)
         .quad do_set_segment_base   /* 25 */
         .quad do_mmuext_op
         .quad do_acm_op
+	.quad do_pmc_op
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .quad do_ni_hypercall
         .endr
diff -Naurp xen-unstable.hg-20050823-nooprofile/xen/include/asm-x86/msr.h xen-unstable.hg-20050823/xen/include/asm-x86/msr.h
--- xen-unstable.hg-20050823-nooprofile/xen/include/asm-x86/msr.h	2005-08-24 07:44:53.000000000 -0500
+++ xen-unstable.hg-20050823/xen/include/asm-x86/msr.h	2005-08-23 07:05:17.000000000 -0500
@@ -195,6 +195,89 @@
 #define MSR_P6_EVNTSEL0			0x186
 #define MSR_P6_EVNTSEL1			0x187
 
+/* Pentium IV performance counter MSRs */
+#define MSR_P4_BPU_PERFCTR0 		0x300
+#define MSR_P4_BPU_PERFCTR1 		0x301
+#define MSR_P4_BPU_PERFCTR2 		0x302
+#define MSR_P4_BPU_PERFCTR3 		0x303
+#define MSR_P4_MS_PERFCTR0 		0x304
+#define MSR_P4_MS_PERFCTR1 		0x305
+#define MSR_P4_MS_PERFCTR2 		0x306
+#define MSR_P4_MS_PERFCTR3 		0x307
+#define MSR_P4_FLAME_PERFCTR0 		0x308
+#define MSR_P4_FLAME_PERFCTR1 		0x309
+#define MSR_P4_FLAME_PERFCTR2 		0x30a
+#define MSR_P4_FLAME_PERFCTR3 		0x30b
+#define MSR_P4_IQ_PERFCTR0 		0x30c
+#define MSR_P4_IQ_PERFCTR1 		0x30d
+#define MSR_P4_IQ_PERFCTR2 		0x30e
+#define MSR_P4_IQ_PERFCTR3 		0x30f
+#define MSR_P4_IQ_PERFCTR4 		0x310
+#define MSR_P4_IQ_PERFCTR5 		0x311
+#define MSR_P4_BPU_CCCR0 		0x360
+#define MSR_P4_BPU_CCCR1 		0x361
+#define MSR_P4_BPU_CCCR2 		0x362
+#define MSR_P4_BPU_CCCR3 		0x363
+#define MSR_P4_MS_CCCR0 		0x364
+#define MSR_P4_MS_CCCR1 		0x365
+#define MSR_P4_MS_CCCR2 		0x366
+#define MSR_P4_MS_CCCR3 		0x367
+#define MSR_P4_FLAME_CCCR0 		0x368
+#define MSR_P4_FLAME_CCCR1 		0x369
+#define MSR_P4_FLAME_CCCR2 		0x36a
+#define MSR_P4_FLAME_CCCR3 		0x36b
+#define MSR_P4_IQ_CCCR0 		0x36c
+#define MSR_P4_IQ_CCCR1 		0x36d
+#define MSR_P4_IQ_CCCR2 		0x36e
+#define MSR_P4_IQ_CCCR3 		0x36f
+#define MSR_P4_IQ_CCCR4 		0x370
+#define MSR_P4_IQ_CCCR5 		0x371
+#define MSR_P4_ALF_ESCR0 		0x3ca
+#define MSR_P4_ALF_ESCR1 		0x3cb
+#define MSR_P4_BPU_ESCR0 		0x3b2
+#define MSR_P4_BPU_ESCR1 		0x3b3
+#define MSR_P4_BSU_ESCR0 		0x3a0
+#define MSR_P4_BSU_ESCR1 		0x3a1
+#define MSR_P4_CRU_ESCR0 		0x3b8
+#define MSR_P4_CRU_ESCR1 		0x3b9
+#define MSR_P4_CRU_ESCR2 		0x3cc
+#define MSR_P4_CRU_ESCR3 		0x3cd
+#define MSR_P4_CRU_ESCR4 		0x3e0
+#define MSR_P4_CRU_ESCR5 		0x3e1
+#define MSR_P4_DAC_ESCR0 		0x3a8
+#define MSR_P4_DAC_ESCR1 		0x3a9
+#define MSR_P4_FIRM_ESCR0 		0x3a4
+#define MSR_P4_FIRM_ESCR1 		0x3a5
+#define MSR_P4_FLAME_ESCR0 		0x3a6
+#define MSR_P4_FLAME_ESCR1 		0x3a7
+#define MSR_P4_FSB_ESCR0 		0x3a2
+#define MSR_P4_FSB_ESCR1 		0x3a3
+#define MSR_P4_IQ_ESCR0 		0x3ba
+#define MSR_P4_IQ_ESCR1 		0x3bb
+#define MSR_P4_IS_ESCR0 		0x3b4
+#define MSR_P4_IS_ESCR1 		0x3b5
+#define MSR_P4_ITLB_ESCR0 		0x3b6
+#define MSR_P4_ITLB_ESCR1 		0x3b7
+#define MSR_P4_IX_ESCR0 		0x3c8
+#define MSR_P4_IX_ESCR1 		0x3c9
+#define MSR_P4_MOB_ESCR0 		0x3aa
+#define MSR_P4_MOB_ESCR1 		0x3ab
+#define MSR_P4_MS_ESCR0 		0x3c0
+#define MSR_P4_MS_ESCR1 		0x3c1
+#define MSR_P4_PMH_ESCR0 		0x3ac
+#define MSR_P4_PMH_ESCR1 		0x3ad
+#define MSR_P4_RAT_ESCR0 		0x3bc
+#define MSR_P4_RAT_ESCR1 		0x3bd
+#define MSR_P4_SAAT_ESCR0 		0x3ae
+#define MSR_P4_SAAT_ESCR1 		0x3af
+#define MSR_P4_SSU_ESCR0 		0x3be
+#define MSR_P4_SSU_ESCR1 		0x3bf    /* guess: not defined in manual */
+#define MSR_P4_TBPU_ESCR0 		0x3c2
+#define MSR_P4_TBPU_ESCR1 		0x3c3
+#define MSR_P4_TC_ESCR0 		0x3c4
+#define MSR_P4_TC_ESCR1 		0x3c5
+#define MSR_P4_U2L_ESCR0 		0x3b0
+#define MSR_P4_U2L_ESCR1 		0x3b1
 
 /* K7/K8 MSRs. Not complete. See the architecture manual for a more complete list. */
 #define MSR_K7_EVNTSEL0            0xC0010000
diff -Naurp xen-unstable.hg-20050823-nooprofile/xen/include/asm-x86/nmi.h xen-unstable.hg-20050823/xen/include/asm-x86/nmi.h
--- xen-unstable.hg-20050823-nooprofile/xen/include/asm-x86/nmi.h	1969-12-31 18:00:00.000000000 -0600
+++ xen-unstable.hg-20050823/xen/include/asm-x86/nmi.h	2005-08-23 07:05:17.000000000 -0500
@@ -0,0 +1,26 @@
+/*
+ *  linux/include/asm-i386/nmi.h
+ */
+#ifndef ASM_NMI_H
+#define ASM_NMI_H
+
+struct cpu_user_regs;
+ 
+typedef int (*nmi_callback_t)(struct cpu_user_regs * regs, int cpu);
+ 
+/** 
+ * set_nmi_callback
+ *
+ * Set a handler for an NMI. Only one handler may be
+ * set. Return 1 if the NMI was handled.
+ */
+void set_nmi_callback(nmi_callback_t callback);
+ 
+/** 
+ * unset_nmi_callback
+ *
+ * Remove the handler previously set.
+ */
+void unset_nmi_callback(void);
+ 
+#endif /* ASM_NMI_H */
diff -Naurp xen-unstable.hg-20050823-nooprofile/xen/include/public/xen.h xen-unstable.hg-20050823/xen/include/public/xen.h
--- xen-unstable.hg-20050823-nooprofile/xen/include/public/xen.h	2005-08-24 07:44:53.000000000 -0500
+++ xen-unstable.hg-20050823/xen/include/public/xen.h	2005-08-23 07:05:17.000000000 -0500
@@ -4,6 +4,10 @@
  * Guest OS interface to Xen.
  * 
  * Copyright (c) 2004, K A Fraser
+ *
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  */
 
 #ifndef __XEN_PUBLIC_XEN_H__
@@ -59,6 +63,7 @@
 #define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
 #define __HYPERVISOR_mmuext_op            26
 #define __HYPERVISOR_acm_op               27
+#define __HYPERVISOR_pmc_op               28
 
 /* 
  * VIRTUAL INTERRUPTS
@@ -72,7 +77,8 @@
 #define VIRQ_PARITY_ERR 4  /* (DOM0) NMI parity error.                    */
 #define VIRQ_IO_ERR     5  /* (DOM0) NMI I/O error.                       */
 #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
-#define NR_VIRQS        7
+#define VIRQ_PMC_OVF	7  /* PMC Overflow */
+#define NR_VIRQS        8
 
 /*
  * MMU-UPDATE REQUESTS
@@ -240,6 +246,21 @@ struct mmuext_op {
 #define VMASST_TYPE_writable_pagetables  2
 #define MAX_VMASST_TYPE 2
 
+/*
+ * Commands to HYPERVISOR_pmc_op().
+ */
+#define PMC_INIT		0
+#define PMC_SET_ACTIVE		1
+#define PMC_SET_PASSIVE		2
+#define PMC_RESERVE_COUNTERS	3
+#define PMC_SETUP_EVENTS	4
+#define PMC_ENABLE_VIRQ		5
+#define PMC_START		6
+#define PMC_STOP		7
+#define PMC_DISABLE_VIRQ	8
+#define PMC_RELEASE_COUNTERS	9
+#define PMC_SHUTDOWN		10
+
 #ifndef __ASSEMBLY__
 
 typedef u16 domid_t;
@@ -292,6 +313,8 @@ typedef struct
 /* Event channel endpoints per domain. */
 #define NR_EVENT_CHANNELS 1024
 
+#define MAX_OPROF_EVENTS	32
+#define MAX_OPROF_DOMAINS	25	
 /*
  * Per-VCPU information goes here. This will be cleaned up more when Xen 
  * actually supports multi-VCPU guests.
@@ -408,6 +431,21 @@ typedef struct shared_info {
 
     arch_shared_info_t arch;
 
+    /* Oprofile structures */
+    u8 event_head;
+    u8 event_tail;
+    struct {
+	u64 eip;
+	u8 mode;
+	u8 event;
+    } event_log[MAX_OPROF_EVENTS];
+    u8 losing_samples;
+    u64 samples_lost;
+    u32 nmi_restarts;
+    u64 active_samples;
+    u64 passive_samples;
+    u64 other_samples;
+
 } shared_info_t;
 
 /*

[-- Attachment #4: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2005-08-24 21:54 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-08-22 15:54 [PATCH] xenoprofile x86_64 Andrew Theurer
  -- strict thread matches above, loose matches on Subject: below --
2005-08-22 16:43 Santos, Jose Renato G
2005-08-24 21:54 ` Andrew Theurer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.