WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] xenoprofile x86_64

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH] xenoprofile x86_64
From: Andrew Theurer <habanero@xxxxxxxxxx>
Date: Mon, 22 Aug 2005 10:54:48 -0500
Delivery-date: Mon, 22 Aug 2005 15:53:41 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mozilla Thunderbird 1.0.6 (X11/20050716)
Attached are patches for xenoprofile on x86_64. These are not "production ready", but they do work on EM64T so far. I have not added support for Opteron just yet (but will very soon). I wanted to get these out ASAP in case anyone wanted to try them. There are not too many changes from Renato's patches, mainly use of KERNEL_MODE instead of RING_1, u64's here and there, and new x86_64 specific files. I have not tested these patches on i386 (some changes needed). These should apply on changeset 6315.

-Andrew

Signed-off-by: Andrew Theurer <habanero@xxxxxxxxxx>

diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/Makefile 
./xen/arch/x86/Makefile
--- ../xen-unstable.hg-6251/xen/arch/x86/Makefile       2005-08-19 23:46:22 
-05:00
+++ ./xen/arch/x86/Makefile     2005-08-18 20:28:44 -05:00
@@ -33,7 +33,10 @@ ifneq ($(crash_debug),y)
 OBJS := $(patsubst cdb%.o,,$(OBJS))
 endif
 
+OBJS += oprofile/oprofile.o
+
 default: $(TARGET)
+       make -C oprofile
 
 $(TARGET): $(TARGET)-syms boot/mkelf32
        ./boot/mkelf32 $(TARGET)-syms $(TARGET) 0x100000
@@ -60,6 +63,9 @@ asm-offsets.s: $(TARGET_SUBARCH)/asm-off
 boot/mkelf32: boot/mkelf32.c
        $(HOSTCC) $(HOSTCFLAGS) -o $@ $<
 
+oprofile/oprofile.o:
+       $(MAKE) -C oprofile
+
 clean:
        rm -f *.o *.s *~ core boot/*.o boot/*~ boot/core boot/mkelf32
        rm -f x86_32/*.o x86_32/*~ x86_32/core
@@ -68,5 +74,6 @@ clean:
        rm -f acpi/*.o acpi/*~ acpi/core
        rm -f genapic/*.o genapic/*~ genapic/core
        rm -f cpu/*.o cpu/*~ cpu/core
+       rm -f oprofile/*.o
 
 .PHONY: default clean
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/nmi.c ./xen/arch/x86/nmi.c
--- ../xen-unstable.hg-6251/xen/arch/x86/nmi.c  2005-08-19 23:46:22 -05:00
+++ ./xen/arch/x86/nmi.c        2005-08-18 20:28:44 -05:00
@@ -5,6 +5,10 @@
  *
  *  Started by Ingo Molnar <mingo@xxxxxxxxxx>
  *
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
  *  Fixes:
  *  Mikael Pettersson  : AMD K7 support for local APIC NMI watchdog.
  *  Mikael Pettersson  : Power Management for local APIC NMI watchdog.
@@ -35,6 +39,28 @@ static unsigned int nmi_p4_cccr_val;
 static struct ac_timer nmi_timer[NR_CPUS];
 static unsigned int nmi_timer_ticks[NR_CPUS];
 
+/*
+ * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
+ * - it may be reserved by some other driver, or not
+ * - when not reserved by some other driver, it may be used for
+ *   the NMI watchdog, or not
+ *
+ * This is maintained separately from nmi_active because the NMI
+ * watchdog may also be driven from the I/O APIC timer.
+ */
+static spinlock_t lapic_nmi_owner_lock = SPIN_LOCK_UNLOCKED;
+static unsigned int lapic_nmi_owner;
+#define LAPIC_NMI_WATCHDOG      (1<<0)
+#define LAPIC_NMI_RESERVED      (1<<1)
+                                                                               
                              
+/* nmi_active:
+ * +1: the lapic NMI watchdog is active, but can be disabled
+ *  0: the lapic NMI watchdog has not been set up, and cannot
+ *     be enabled
+ * -1: the lapic NMI watchdog is disabled, but can be enabled
+ */
+int nmi_active;
+
 #define K7_EVNTSEL_ENABLE      (1 << 22)
 #define K7_EVNTSEL_INT         (1 << 20)
 #define K7_EVNTSEL_OS          (1 << 17)
@@ -66,8 +92,6 @@ static unsigned int nmi_timer_ticks[NR_C
  * max threshold. [IA32-Vol3, Section 14.9.9] 
  */
 #define MSR_P4_IQ_COUNTER0     0x30C
-#define MSR_P4_IQ_CCCR0                0x36C
-#define MSR_P4_CRU_ESCR0       0x3B8 /* ESCR no. 4 */
 #define P4_NMI_CRU_ESCR0       P4_ESCR_EVENT_SELECT(0x3F)
 #define P4_NMI_IQ_CCCR0        \
     (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
@@ -124,6 +148,70 @@ static inline void nmi_pm_init(void) { }
  * Original code written by Keith Owens.
  */
 
+static void disable_lapic_nmi_watchdog(void)
+{
+        if (nmi_active <= 0)
+                return;
+        switch (boot_cpu_data.x86_vendor) {
+        case X86_VENDOR_AMD:
+                wrmsr(MSR_K7_EVNTSEL0, 0, 0);
+                break;
+        case X86_VENDOR_INTEL:
+                switch (boot_cpu_data.x86) {
+                case 6:
+                        wrmsr(MSR_P6_EVNTSEL0, 0, 0);
+                        break;
+                case 15:
+                       if ( (smp_num_siblings <= 1) ||
+                            ( (smp_processor_id() % smp_num_siblings) == 0) )
+                       {
+                               wrmsr(MSR_P4_IQ_CCCR0, 0, 0);
+                               wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
+                       } else {
+                               wrmsr(MSR_P4_IQ_CCCR1, 0, 0);   
+                       }
+                        break;
+                }
+                break;
+        }
+        nmi_active = -1;
+        /* tell do_nmi() and others that we're not active any more */
+        nmi_watchdog = 0;
+}
+
+static void enable_lapic_nmi_watchdog(void)
+{
+        if (nmi_active < 0) {
+                nmi_watchdog = NMI_LOCAL_APIC;
+                setup_apic_nmi_watchdog();
+        }
+}
+
+int reserve_lapic_nmi(void)
+{
+        unsigned int old_owner;
+        spin_lock(&lapic_nmi_owner_lock);
+        old_owner = lapic_nmi_owner;
+        lapic_nmi_owner |= LAPIC_NMI_RESERVED;
+        spin_unlock(&lapic_nmi_owner_lock);
+        if (old_owner & LAPIC_NMI_RESERVED)
+                return -EBUSY;
+        if (old_owner & LAPIC_NMI_WATCHDOG)
+                disable_lapic_nmi_watchdog();
+        return 0;
+}
+
+void release_lapic_nmi(void)
+{
+        unsigned int new_owner;
+        spin_lock(&lapic_nmi_owner_lock);
+        new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED;
+        lapic_nmi_owner = new_owner;
+        spin_unlock(&lapic_nmi_owner_lock);
+        if (new_owner & LAPIC_NMI_WATCHDOG)
+                enable_lapic_nmi_watchdog();
+}
+
 static void __pminit clear_msr_range(unsigned int base, unsigned int n)
 {
     unsigned int i;
@@ -241,6 +329,9 @@ void __pminit setup_apic_nmi_watchdog(vo
 
     init_ac_timer(&nmi_timer[cpu], nmi_timer_fn, NULL, cpu);
 
+    lapic_nmi_owner = LAPIC_NMI_WATCHDOG;
+    nmi_active = 1;
+
     nmi_pm_init();
 }
 
@@ -337,3 +428,7 @@ void nmi_watchdog_tick(struct cpu_user_r
         wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
     }
 }
+
+EXPORT_SYMBOL(reserve_lapic_nmi);
+EXPORT_SYMBOL(release_lapic_nmi);
+
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/Makefile 
./xen/arch/x86/oprofile/Makefile
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/Makefile      1969-12-31 
18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/Makefile    2005-08-18 20:28:44 -05:00
@@ -0,0 +1,9 @@
+
+include $(BASEDIR)/Rules.mk
+                                     
+default: $(OBJS) 
+       $(LD) $(LDFLAGS) -r -o oprofile.o $(OBJS)
+
+%.o: %.c $(HDRS) Makefile
+       $(CC) $(CFLAGS) -c $< -o $@
+
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/nmi_int.c 
./xen/arch/x86/oprofile/nmi_int.c
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/nmi_int.c     1969-12-31 
18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/nmi_int.c   2005-08-19 19:32:01 -05:00
@@ -0,0 +1,444 @@
+/**
+ * @file nmi_int.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <xen/event.h>
+#include <xen/types.h>
+#include <xen/errno.h>
+#include <xen/init.h>
+#include <public/xen.h>
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+#include <xen/delay.h>
+ 
+#include "op_counter.h"
+#include "op_x86_model.h"
+ 
+static struct op_x86_model_spec const * model;
+static struct op_msrs cpu_msrs[NR_CPUS];
+static unsigned long saved_lvtpc[NR_CPUS];
+
+#define VIRQ_BITMASK_SIZE      (MAX_OPROF_DOMAINS/32 + 1)
+
+extern int active_domains[MAX_OPROF_DOMAINS];
+extern unsigned int adomains;
+
+extern struct domain * primary_profiler;
+extern struct domain * adomain_ptrs[MAX_OPROF_DOMAINS];
+extern unsigned long virq_ovf_pending[VIRQ_BITMASK_SIZE];
+
+extern int is_active(struct domain *d);
+extern int active_id(struct domain *d);
+extern int is_passive(struct domain *d);
+extern int is_profiled(struct domain *d);
+
+
+int nmi_profiling_started = 0;
+
+int active_virq_count = 0;
+int passive_virq_count = 0;
+int other_virq_count = 0;
+int other_id = -1;
+int xen_count = 0;
+int dom_count = 0; 
+int ovf = 0;
+
+int nmi_callback(struct cpu_user_regs * regs, int cpu)
+{
+       int xen_mode = 0;
+
+       ovf = model->check_ctrs(cpu, &cpu_msrs[cpu], regs);
+       xen_mode = RING_0(regs);
+       if (ovf) {
+               if (xen_mode)
+                       xen_count++;
+               else
+                       dom_count++;
+
+               if (is_active(current->domain)) {
+               /* This is slightly incorrect. If we do not deliver 
+                       OVF virtual interrupts in a synchronous 
+                       manner, a process switch may happen in the domain 
+                       between the point the sample was collected and 
+                       the point at which a VIRQ was delivered. However, 
+                       it is not safe to call send_guest_virq from this 
+                       NMI context, it may lead to a deadlock since NMIs are 
+                       unmaskable. One optimization that we can do is 
+                       that if the sample occurs while domain code is 
+                       runnng, we know that it is safe to call 
+                       send_guest_virq, since we know no Xen code 
+                       is running at that time.
+                       However, this may distort the sample distribution,
+                       because we may lose more Xen mode samples.*/
+                       active_virq_count++;
+                       if (!xen_mode) {
+                               send_guest_virq(current, VIRQ_PMC_OVF);
+                               clear_bit(active_id(current->domain), 
&virq_ovf_pending[0]);
+                       } else 
+                               set_bit(active_id(current->domain), 
&virq_ovf_pending[0]);
+                       primary_profiler->shared_info->active_samples++;
+               }
+               else if (is_passive(current->domain)) {
+                       set_bit(active_id(primary_profiler), 
&virq_ovf_pending[0]);
+                       passive_virq_count++;
+                       primary_profiler->shared_info->passive_samples++;
+               }
+               else {
+                       other_virq_count++;
+                       other_id = current->domain->domain_id;
+                       primary_profiler->shared_info->other_samples++;
+               }
+       }
+       return 1;
+}
+
+static void free_msrs(void)
+{
+       int i;
+       for (i = 0; i < NR_CPUS; ++i) {
+               xfree(cpu_msrs[i].counters);
+               cpu_msrs[i].counters = NULL;
+               xfree(cpu_msrs[i].controls);
+               cpu_msrs[i].controls = NULL;
+       }
+}
+ 
+static int allocate_msrs(void)
+{
+       int success = 1;
+       size_t controls_size = sizeof(struct op_msr) * model->num_controls;
+       size_t counters_size = sizeof(struct op_msr) * model->num_counters;
+
+       int i;
+       for (i = 0; i < NR_CPUS; ++i) {
+               //if (!cpu_online(i))
+               if (!test_bit(i, &cpu_online_map))
+                       continue;
+
+               cpu_msrs[i].counters = xmalloc_bytes(counters_size);
+               if (!cpu_msrs[i].counters) {
+                       success = 0;
+                       break;
+               }
+               cpu_msrs[i].controls = xmalloc_bytes(controls_size);
+               if (!cpu_msrs[i].controls) {
+                       success = 0;
+                       break;
+               }
+       }
+       if (!success)
+               free_msrs();
+
+       return success;
+}
+
+static void nmi_cpu_save_registers(struct op_msrs * msrs)
+{
+       unsigned int const nr_ctrs = model->num_counters;
+       unsigned int const nr_ctrls = model->num_controls; 
+       struct op_msr * counters = msrs->counters;
+       struct op_msr * controls = msrs->controls;
+       unsigned int i;
+
+       for (i = 0; i < nr_ctrs; ++i) {
+               rdmsr(counters[i].addr,
+                       counters[i].saved.low,
+                       counters[i].saved.high);
+       }
+ 
+       for (i = 0; i < nr_ctrls; ++i) {
+               rdmsr(controls[i].addr,
+                       controls[i].saved.low,
+                       controls[i].saved.high);
+       }
+}
+
+static void nmi_save_registers(void * dummy)
+{
+       int cpu = smp_processor_id();
+       struct op_msrs * msrs = &cpu_msrs[cpu];
+       model->fill_in_addresses(msrs);
+       nmi_cpu_save_registers(msrs);
+}
+
+int nmi_reserve_counters(void)
+{
+       if (!allocate_msrs())
+               return -ENOMEM;
+
+       /* We walk a thin line between law and rape here.
+        * We need to be careful to install our NMI handler
+        * without actually triggering any NMIs as this will
+        * break the core code horrifically.
+        */
+       /* Don't we need to do this on all CPUs?*/
+       if (reserve_lapic_nmi() < 0) {
+               free_msrs();
+               return -EBUSY;
+       }
+       /* We need to serialize save and setup for HT because the subset
+        * of msrs are distinct for save and setup operations
+        */
+       on_each_cpu(nmi_save_registers, NULL, 0, 1);
+       return 0;
+}
+
+static void nmi_cpu_setup(void * dummy)
+{
+       int cpu = smp_processor_id();
+       struct op_msrs * msrs = &cpu_msrs[cpu];
+       model->setup_ctrs(msrs);
+}
+
+int nmi_setup_events(void)
+{
+       on_each_cpu(nmi_cpu_setup, NULL, 0, 1);
+       return 0;
+}
+
+int nmi_enable_virq()
+{
+       set_nmi_callback(nmi_callback);
+       return 0;
+}
+
+static void nmi_cpu_start(void * dummy)
+{
+       int cpu = smp_processor_id();
+       struct op_msrs const * msrs = &cpu_msrs[cpu];
+       saved_lvtpc[cpu] = apic_read(APIC_LVTPC);
+       apic_write(APIC_LVTPC, APIC_DM_NMI);
+       model->start(msrs);
+}
+
+int nmi_start(void)
+{
+       on_each_cpu(nmi_cpu_start, NULL, 0, 1);
+       nmi_profiling_started = 1;
+       return 0;
+}
+
+static void nmi_cpu_stop(void * dummy)
+{
+       unsigned int v;
+       int cpu = smp_processor_id();
+       struct op_msrs const * msrs = &cpu_msrs[cpu];
+       model->stop(msrs);
+
+       /* restoring APIC_LVTPC can trigger an apic error because the delivery
+        * mode and vector nr combination can be illegal. That's by design: on
+        * power on apic lvt contain a zero vector nr which are legal only for
+        * NMI delivery mode. So inhibit apic err before restoring lvtpc
+        */
+       if (!(apic_read(APIC_LVTPC) & APIC_DM_NMI)
+               || (apic_read(APIC_LVTPC) & APIC_LVT_MASKED)) {
+               printk("nmi_stop: APIC not good %ul\n", apic_read(APIC_LVTPC));
+               mdelay(5000);
+       }
+       v = apic_read(APIC_LVTERR);
+       apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+       apic_write(APIC_LVTPC, saved_lvtpc[cpu]);
+       apic_write(APIC_LVTERR, v);
+}
+ 
+void nmi_stop(void)
+{
+       nmi_profiling_started = 0;
+       on_each_cpu(nmi_cpu_stop, NULL, 0, 1);
+       active_virq_count = 0;
+       passive_virq_count = 0;
+       other_virq_count = 0;
+       xen_count = 0;
+       dom_count = 0;
+}
+
+extern unsigned int read_ctr(struct op_msrs const * const msrs, int ctr);
+
+void nmi_sanity_check(struct cpu_user_regs *regs, int cpu)
+{
+       int i;
+       int masked = 0;
+
+       /* We may have missed some NMI interrupts if we were already 
+               in an NMI context at that time. If this happens, then 
+               the counters are not reset and in the case of P4, the 
+               APIC LVT disable mask is set. In both cases we end up 
+               losing samples. On P4, this condition can be detected 
+               by checking the APIC LVT mask. But in P6, we need to 
+               examine the counters for overflow. So, every timer 
+               interrupt, we check that everything is OK */
+
+       if (apic_read(APIC_LVTPC) & APIC_LVT_MASKED)
+               masked = 1;
+
+       nmi_callback(regs, cpu);
+
+       if (ovf && masked) {
+               if (is_active(current->domain))
+                       current->domain->shared_info->nmi_restarts++;
+               else if (is_passive(current->domain))
+                       primary_profiler->shared_info->nmi_restarts++;
+       }
+
+       /*if (jiffies %1000 == 0) {     
+               printk("cpu %d: sample count %d %d %d at %u\n", cpu, 
active_virq_count, passive_virq_count, other_virq_count, jiffies);
+               printk("other task id %d\n", other_id);
+               printk("%d in xen, %d in domain\n", xen_count, dom_count);
+               printk("counters %p %p\n", read_ctr(&cpu_msrs[cpu], 0), 
read_ctr(&cpu_msrs[cpu], 1));
+       }*/
+       
+
+       for (i = 0; i < adomains; i++)
+               if (test_and_clear_bit(i, &virq_ovf_pending[0])) {
+                 /* For now we do not support profiling of SMP guests */
+                  /* virq is delivered to first VCPU */  
+                 send_guest_virq(adomain_ptrs[i]->vcpu[0], VIRQ_PMC_OVF);
+               }
+}
+
+void nmi_disable_virq(void)
+{
+       unset_nmi_callback();
+} 
+
+static void nmi_restore_registers(struct op_msrs * msrs)
+{
+       unsigned int const nr_ctrs = model->num_counters;
+       unsigned int const nr_ctrls = model->num_controls; 
+       struct op_msr * counters = msrs->counters;
+       struct op_msr * controls = msrs->controls;
+       unsigned int i;
+
+       for (i = 0; i < nr_ctrls; ++i) {
+               wrmsr(controls[i].addr,
+                       controls[i].saved.low,
+                       controls[i].saved.high);
+       }
+ 
+       for (i = 0; i < nr_ctrs; ++i) {
+               wrmsr(counters[i].addr,
+                       counters[i].saved.low,
+                       counters[i].saved.high);
+       }
+}
+ 
+static void nmi_cpu_shutdown(void * dummy)
+{
+       int cpu = smp_processor_id();
+       struct op_msrs * msrs = &cpu_msrs[cpu];
+       nmi_restore_registers(msrs);
+}
+ 
+void nmi_release_counters(void)
+{
+       on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1);
+       release_lapic_nmi();
+       free_msrs();
+}
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+static int __init p4_init(void)
+{
+       __u8 cpu_model = current_cpu_data.x86_model;
+       printk("cpu model: %d\n", cpu_model);
+       if (cpu_model > 4)
+               return 0;
+
+#ifndef CONFIG_SMP
+       printk("model is op_p4_spec (uniprocessor)\n");
+       model = &op_p4_spec;
+       return 1;
+#else
+       //switch (smp_num_siblings) {
+       printk("model is op_p4_ht2_spec (SMP)\n");
+       if (cpu_has_ht) 
+       {
+         model = &op_p4_ht2_spec;
+         return 1;
+       }
+       else
+       {
+         printk("model is op_p4_spec (SMP)\n");
+         model = &op_p4_spec;
+         return 1;
+       }
+#endif
+       return 0;
+}
+
+
+static int __init ppro_init(void)
+{
+       __u8 cpu_model = current_cpu_data.x86_model;
+
+       if (cpu_model > 0xd)
+               return 0;
+
+       model = &op_ppro_spec;
+       return 1;
+}
+
+int nmi_init(int *num_events, int *is_primary)
+{
+       __u8 vendor = current_cpu_data.x86_vendor;
+       __u8 family = current_cpu_data.x86;
+       int prim = 0;
+ 
+       if (!cpu_has_apic) {
+               printk("(XEN) cpu has no APIC\n");
+               return -ENODEV;
+       }
+
+       if (primary_profiler == NULL) {
+               primary_profiler = current->domain;
+               prim = 1;
+       }
+
+       if (primary_profiler != current->domain)
+               goto out;
+
+       printk("cpu vendor: %d\n", vendor);
+       printk("cpu family: %d\n", family);
+
+       switch (vendor) {
+               case X86_VENDOR_INTEL:
+                       switch (family) {
+                               /* Pentium IV */
+                               case 0xf:
+                                       if (!p4_init())
+                                               return -ENODEV;
+                                       break;
+                               /* A P6-class processor */
+                               case 6:
+                                       if (!ppro_init())
+                                               return -ENODEV;
+                                       break;
+                               default:
+                                       return -ENODEV;
+                       }
+                       break;
+               default:
+                       return -ENODEV;
+       }
+out:
+       if (copy_to_user((void *)num_events, (void *)&model->num_counters, 
sizeof(int)))
+               return -EFAULT;
+       if (copy_to_user((void *)is_primary, (void *)&prim, sizeof(int)))
+               return -EFAULT;
+
+       return 0;
+}
+
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_counter.h 
./xen/arch/x86/oprofile/op_counter.h
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_counter.h  1969-12-31 
18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/op_counter.h        2005-08-18 20:28:44 -05:00
@@ -0,0 +1,33 @@
+/**
+ * @file op_counter.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+ 
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+ 
+#define OP_MAX_COUNTER 8
+ 
+/* Per-perfctr configuration as set via
+ * oprofilefs.
+ */
+struct op_counter_config {
+        unsigned long count;
+        unsigned long enabled;
+        unsigned long event;
+        unsigned long kernel;
+        unsigned long user;
+        unsigned long unit_mask;
+};
+
+extern struct op_counter_config counter_config[];
+
+#endif /* OP_COUNTER_H */
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_model_p4.c 
./xen/arch/x86/oprofile/op_model_p4.c
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_model_p4.c 1969-12-31 
18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/op_model_p4.c       2005-08-19 22:25:07 -05:00
@@ -0,0 +1,748 @@
+/**
+ * @file op_model_p4.c
+ * P4 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <xen/types.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_EVENTS 39
+
+#define NUM_COUNTERS_NON_HT 8
+#define NUM_ESCRS_NON_HT 45
+#define NUM_CCCRS_NON_HT 18
+#define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
+
+#define NUM_COUNTERS_HT2 4
+#define NUM_ESCRS_HT2 23
+#define NUM_CCCRS_HT2 9
+#define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
+
+static unsigned int num_counters = NUM_COUNTERS_NON_HT;
+
+
+/* this has to be checked dynamically since the
+   hyper-threadedness of a chip is discovered at
+   kernel boot-time. */
+static inline void setup_num_counters(void)
+{
+#ifdef CONFIG_SMP
+       if (cpu_has_ht)
+               num_counters = NUM_COUNTERS_HT2;
+#endif
+}
+
+static int inline addr_increment(void)
+{
+#ifdef CONFIG_SMP
+       return cpu_has_ht ? 2 : 1;
+#else
+       return 1;
+#endif
+}
+
+
+/* tables to simulate simplified hardware view of p4 registers */
+struct p4_counter_binding {
+       int virt_counter;
+       int counter_address;
+       int cccr_address;
+};
+
+struct p4_event_binding {
+       int escr_select;  /* value to put in CCCR */
+       int event_select; /* value to put in ESCR */
+       struct {
+               int virt_counter; /* for this counter... */
+               int escr_address; /* use this ESCR       */
+       } bindings[2];
+};
+
+/* nb: these CTR_* defines are a duplicate of defines in
+   event/i386.p4*events. */
+
+
+#define CTR_BPU_0      (1 << 0)
+#define CTR_MS_0       (1 << 1)
+#define CTR_FLAME_0    (1 << 2)
+#define CTR_IQ_4       (1 << 3)
+#define CTR_BPU_2      (1 << 4)
+#define CTR_MS_2       (1 << 5)
+#define CTR_FLAME_2    (1 << 6)
+#define CTR_IQ_5       (1 << 7)
+
+static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = {
+       { CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
+       { CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
+       { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
+       { CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
+       { CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
+       { CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
+       { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
+       { CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
+};
+
+#define NUM_UNUSED_CCCRS       NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT
+
+/* All cccr we don't use. */
+static int p4_unused_cccr[NUM_UNUSED_CCCRS] = {
+       MSR_P4_BPU_CCCR1,       MSR_P4_BPU_CCCR3,
+       MSR_P4_MS_CCCR1,        MSR_P4_MS_CCCR3,
+       MSR_P4_FLAME_CCCR1,     MSR_P4_FLAME_CCCR3,
+       MSR_P4_IQ_CCCR0,        MSR_P4_IQ_CCCR1,
+       MSR_P4_IQ_CCCR2,        MSR_P4_IQ_CCCR3
+};
+
+/* p4 event codes in libop/op_event.h are indices into this table. */
+
+static struct p4_event_binding p4_events[NUM_EVENTS] = {
+       
+       { /* BRANCH_RETIRED */
+               0x05, 0x06, 
+               { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
+                 {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+       },
+       
+       { /* MISPRED_BRANCH_RETIRED */
+               0x04, 0x03, 
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+       },
+       
+       { /* TC_DELIVER_MODE */
+               0x01, 0x01,
+               { { CTR_MS_0, MSR_P4_TC_ESCR0},  
+                 { CTR_MS_2, MSR_P4_TC_ESCR1} }
+       },
+       
+       { /* BPU_FETCH_REQUEST */
+               0x00, 0x03, 
+               { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
+                 { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
+       },
+
+       { /* ITLB_REFERENCE */
+               0x03, 0x18,
+               { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
+                 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
+       },
+
+       { /* MEMORY_CANCEL */
+               0x05, 0x02,
+               { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
+       },
+
+       { /* MEMORY_COMPLETE */
+               0x02, 0x08,
+               { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+       },
+
+       { /* LOAD_PORT_REPLAY */
+               0x02, 0x04, 
+               { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+       },
+
+       { /* STORE_PORT_REPLAY */
+               0x02, 0x05,
+               { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
+       },
+
+       { /* MOB_LOAD_REPLAY */
+               0x02, 0x03,
+               { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
+                 { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
+       },
+
+       { /* PAGE_WALK_TYPE */
+               0x04, 0x01,
+               { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
+                 { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
+       },
+
+       { /* BSQ_CACHE_REFERENCE */
+               0x07, 0x0c, 
+               { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+                 { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
+       },
+
+       { /* IOQ_ALLOCATION */
+               0x06, 0x03, 
+               { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+                 { 0, 0 } }
+       },
+
+       { /* IOQ_ACTIVE_ENTRIES */
+               0x06, 0x1a, 
+               { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
+                 { 0, 0 } }
+       },
+
+       { /* FSB_DATA_ACTIVITY */
+               0x06, 0x17, 
+               { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+                 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+       },
+
+       { /* BSQ_ALLOCATION */
+               0x07, 0x05, 
+               { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
+                 { 0, 0 } }
+       },
+
+       { /* BSQ_ACTIVE_ENTRIES */
+               0x07, 0x06,
+               { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},  
+                 { 0, 0 } }
+       },
+
+       { /* X87_ASSIST */
+               0x05, 0x03, 
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+       },
+
+       { /* SSE_INPUT_ASSIST */
+               0x01, 0x34,
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+  
+       { /* PACKED_SP_UOP */
+               0x01, 0x08, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+  
+       { /* PACKED_DP_UOP */
+               0x01, 0x0c, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+
+       { /* SCALAR_SP_UOP */
+               0x01, 0x0a, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+
+       { /* SCALAR_DP_UOP */
+               0x01, 0x0e,
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+
+       { /* 64BIT_MMX_UOP */
+               0x01, 0x02, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+  
+       { /* 128BIT_MMX_UOP */
+               0x01, 0x1a, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+
+       { /* X87_FP_UOP */
+               0x01, 0x04, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+  
+       { /* X87_SIMD_MOVES_UOP */
+               0x01, 0x2e, 
+               { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
+                 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
+       },
+  
+       { /* MACHINE_CLEAR */
+               0x05, 0x02, 
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+       },
+
+       { /* GLOBAL_POWER_EVENTS */
+               0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
+               { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
+                 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
+       },
+  
+       { /* TC_MS_XFER */
+               0x00, 0x05, 
+               { { CTR_MS_0, MSR_P4_MS_ESCR0},
+                 { CTR_MS_2, MSR_P4_MS_ESCR1} }
+       },
+
+       { /* UOP_QUEUE_WRITES */
+               0x00, 0x09,
+               { { CTR_MS_0, MSR_P4_MS_ESCR0},
+                 { CTR_MS_2, MSR_P4_MS_ESCR1} }
+       },
+
+       { /* FRONT_END_EVENT */
+               0x05, 0x08,
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+       },
+
+       { /* EXECUTION_EVENT */
+               0x05, 0x0c,
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+       },
+
+       { /* REPLAY_EVENT */
+               0x05, 0x09,
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
+       },
+
+       { /* INSTR_RETIRED */
+               0x04, 0x02, 
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+       },
+
+       { /* UOPS_RETIRED */
+               0x04, 0x01,
+               { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
+                 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
+       },
+
+       { /* UOP_TYPE */    
+               0x02, 0x02, 
+               { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
+                 { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
+       },
+
+       { /* RETIRED_MISPRED_BRANCH_TYPE */
+               0x02, 0x05, 
+               { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+                 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+       },
+
+       { /* RETIRED_BRANCH_TYPE */
+               0x02, 0x04,
+               { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
+                 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
+       }
+};
+
+
+#define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
+
+#define ESCR_RESERVED_BITS 0x80000003
+#define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
+#define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
+#define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
+#define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
+#define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
+#define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
+#define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
+#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, 
(escr), (high));} while (0)
+#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, 
(escr), (high));} while (0)
+
+#define CCCR_RESERVED_BITS 0x38030FFF
+#define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
+#define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
+#define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
+#define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
+#define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
+#define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
+#define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
+#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, 
(low), (high));} while (0)
+#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, 
(low), (high));} while (0)
+#define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
+#define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
+
+#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), 
(h));} while (0)
+#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), 
-1);} while (0)
+#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000))
+
+
+/* this assigns a "stagger" to the current CPU, which is used throughout
+   the code in this module as an extra array offset, to select the "even"
+   or "odd" part of all the divided resources. */
+static unsigned int get_stagger(void)
+{
+#ifdef CONFIG_SMP
+       /*int cpu = smp_processor_id();
+       return (cpu != first_cpu(cpu_sibling_map[cpu]));*/
+       /* We want the two logical cpus of a physical cpu to use
+       disjoint set of counters. The following code is wrong. */
+       return 0;
+#endif 
+       return 0;
+}
+
+
+/* finally, mediate access to a real hardware counter
+   by passing a "virtual" counter numer to this macro,
+   along with your stagger setting. */
+#define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
+
+static unsigned long reset_value[NUM_COUNTERS_NON_HT];
+
+
+static void p4_fill_in_addresses(struct op_msrs * const msrs)
+{
+       unsigned int i; 
+       unsigned int addr, stag;
+
+       setup_num_counters();
+       stag = get_stagger();
+
+       /* the counter registers we pay attention to */
+       for (i = 0; i < num_counters; ++i) {
+               msrs->counters[i].addr = 
+                       p4_counters[VIRT_CTR(stag, i)].counter_address;
+       }
+
+       /* FIXME: bad feeling, we don't save the 10 counters we don't use. */
+
+       /* 18 CCCR registers */
+       for (i = 0, addr = MSR_P4_BPU_CCCR0 + stag;
+            addr <= MSR_P4_IQ_CCCR5; ++i, addr += addr_increment()) {
+               msrs->controls[i].addr = addr;
+       }
+       
+       /* 43 ESCR registers in three or four discontiguous group */
+       for (addr = MSR_P4_BSU_ESCR0 + stag;
+            addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
+               msrs->controls[i].addr = addr;
+       }
+
+       /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
+        * to avoid special case in nmi_{save|restore}_registers() */
+       if (boot_cpu_data.x86_model >= 0x3) {
+               for (addr = MSR_P4_BSU_ESCR0 + stag;
+                    addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
+                       msrs->controls[i].addr = addr;
+               }
+       } else {
+               for (addr = MSR_P4_IQ_ESCR0 + stag;
+                    addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
+                       msrs->controls[i].addr = addr;
+               }
+       }
+
+       for (addr = MSR_P4_RAT_ESCR0 + stag;
+            addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+               msrs->controls[i].addr = addr;
+       }
+       
+       for (addr = MSR_P4_MS_ESCR0 + stag;
+            addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { 
+               msrs->controls[i].addr = addr;
+       }
+       
+       for (addr = MSR_P4_IX_ESCR0 + stag;
+            addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { 
+               msrs->controls[i].addr = addr;
+       }
+
+       /* there are 2 remaining non-contiguously located ESCRs */
+
+       if (num_counters == NUM_COUNTERS_NON_HT) {              
+               /* standard non-HT CPUs handle both remaining ESCRs*/
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+       } else if (stag == 0) {
+               /* HT CPUs give the first remainder to the even thread, as
+                  the 32nd control register */
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
+
+       } else {
+               /* and two copies of the second to the odd thread,
+                  for the 22st and 23nd control registers */
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+               msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
+       }
+}
+
+
+static void pmc_setup_one_p4_counter(unsigned int ctr)
+{
+       int i;
+       int const maxbind = 2;
+       unsigned int cccr = 0;
+       unsigned int escr = 0;
+       unsigned int high = 0;
+       unsigned int counter_bit;
+       struct p4_event_binding *ev = NULL;
+       unsigned int stag;
+
+       stag = get_stagger();
+       
+       /* convert from counter *number* to counter *bit* */
+       counter_bit = 1 << VIRT_CTR(stag, ctr);
+       
+       /* find our event binding structure. */
+       if (counter_config[ctr].event <= 0 || counter_config[ctr].event > 
NUM_EVENTS) {
+               printk(KERN_ERR 
+                      "oprofile: P4 event code 0x%lx out of range\n", 
+                      counter_config[ctr].event);
+               return;
+       }
+       
+       ev = &(p4_events[counter_config[ctr].event - 1]);
+       
+       for (i = 0; i < maxbind; i++) {
+               if (ev->bindings[i].virt_counter & counter_bit) {
+
+                       /* modify ESCR */
+                       ESCR_READ(escr, high, ev, i);
+                       ESCR_CLEAR(escr);
+                       if (stag == 0) {
+                               ESCR_SET_USR_0(escr, counter_config[ctr].user);
+                               ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
+                       } else {
+                               ESCR_SET_USR_1(escr, counter_config[ctr].user);
+                               ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
+                       }
+                       ESCR_SET_EVENT_SELECT(escr, ev->event_select);
+                       ESCR_SET_EVENT_MASK(escr, 
counter_config[ctr].unit_mask);                       
+                       ESCR_WRITE(escr, high, ev, i);
+                      
+                       /* modify CCCR */
+                       CCCR_READ(cccr, high, VIRT_CTR(stag, ctr));
+                       CCCR_CLEAR(cccr);
+                       CCCR_SET_REQUIRED_BITS(cccr);
+                       CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
+                       if (stag == 0) {
+                               CCCR_SET_PMI_OVF_0(cccr);
+                       } else {
+                               CCCR_SET_PMI_OVF_1(cccr);
+                       }
+                       CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr));
+                       return;
+               }
+       }
+
+       printk(KERN_ERR 
+              "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
+              counter_config[ctr].event, stag, ctr);
+}
+
+
+static void p4_setup_ctrs(struct op_msrs const * const msrs)
+{
+       unsigned int i;
+       unsigned int low, high;
+       unsigned int addr;
+       unsigned int stag;
+
+       stag = get_stagger();
+
+       rdmsr(MSR_IA32_MISC_ENABLE, low, high);
+       if (! MISC_PMC_ENABLED_P(low)) {
+               printk(KERN_ERR "oprofile: P4 PMC not available\n");
+               return;
+       }
+
+       /* clear the cccrs we will use */
+       for (i = 0 ; i < num_counters ; i++) {
+               rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+               CCCR_CLEAR(low);
+               CCCR_SET_REQUIRED_BITS(low);
+               wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
+       }
+
+       /* clear cccrs outside our concern */
+       for (i = stag ; i < NUM_UNUSED_CCCRS ; i += addr_increment()) {
+               rdmsr(p4_unused_cccr[i], low, high);
+               CCCR_CLEAR(low);
+               CCCR_SET_REQUIRED_BITS(low);
+               wrmsr(p4_unused_cccr[i], low, high);
+       }
+
+       /* clear all escrs (including those outside our concern) */
+       for (addr = MSR_P4_BSU_ESCR0 + stag;
+            addr <  MSR_P4_IQ_ESCR0; addr += addr_increment()) {
+               wrmsr(addr, 0, 0);
+       }
+
+       /* On older models clear also MSR_P4_IQ_ESCR0/1 */
+       if (boot_cpu_data.x86_model < 0x3) {
+               wrmsr(MSR_P4_IQ_ESCR0, 0, 0);
+               wrmsr(MSR_P4_IQ_ESCR1, 0, 0);
+       }
+
+       for (addr = MSR_P4_RAT_ESCR0 + stag;
+            addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
+               wrmsr(addr, 0, 0);
+       }
+       
+       for (addr = MSR_P4_MS_ESCR0 + stag;
+            addr <= MSR_P4_TC_ESCR1; addr += addr_increment()){ 
+               wrmsr(addr, 0, 0);
+       }
+       
+       for (addr = MSR_P4_IX_ESCR0 + stag;
+            addr <= MSR_P4_CRU_ESCR3; addr += addr_increment()){ 
+               wrmsr(addr, 0, 0);
+       }
+
+       if (num_counters == NUM_COUNTERS_NON_HT) {              
+               wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
+               wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
+       } else if (stag == 0) {
+               wrmsr(MSR_P4_CRU_ESCR4, 0, 0);
+       } else {
+               wrmsr(MSR_P4_CRU_ESCR5, 0, 0);
+       }               
+       
+       /* setup all counters */
+       for (i = 0 ; i < num_counters ; ++i) {
+               if (counter_config[i].enabled) {
+                       reset_value[i] = counter_config[i].count;
+                       pmc_setup_one_p4_counter(i);
+                       CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i));
+               } else {
+                       reset_value[i] = 0;
+               }
+       }
+}
+
+
+extern void pmc_log_event(struct domain *d, u64 eip, int mode, int event);
+extern int is_profiled(struct domain * d);
+extern struct domain * primary_profiler;
+
+static int p4_check_ctrs(unsigned int const cpu, 
+                         struct op_msrs const * const msrs,
+                         struct cpu_user_regs * const regs)
+{
+       unsigned long ctr, low, high, stag, real;
+       int i, ovf = 0;
+       u64 eip = regs->eip;
+       int mode = 0;
+       struct vcpu *v = current;
+
+       //if (RING_1(regs))
+       if (KERNEL_MODE(v, regs))
+               mode = 1;
+       else if (RING_0(regs))
+               mode = 2;
+
+       stag = get_stagger();
+
+       for (i = 0; i < num_counters; ++i) {
+               if (!reset_value[i]) 
+                       continue;
+
+               /* 
+                * there is some eccentricity in the hardware which
+                * requires that we perform 2 extra corrections:
+                *
+                * - check both the CCCR:OVF flag for overflow and the
+                *   counter high bit for un-flagged overflows.
+                *
+                * - write the counter back twice to ensure it gets
+                *   updated properly.
+                * 
+                * the former seems to be related to extra NMIs happening
+                * during the current NMI; the latter is reported as errata
+                * N15 in intel doc 249199-029, pentium 4 specification
+                * update, though their suggested work-around does not
+                * appear to solve the problem.
+                */
+               
+               real = VIRT_CTR(stag, i);
+
+               CCCR_READ(low, high, real);
+               CTR_READ(ctr, high, real);
+               if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) {
+                       pmc_log_event(current->domain, eip, mode, i);
+                       CTR_WRITE(reset_value[i], real);
+                       CCCR_CLEAR_OVF(low);
+                       CCCR_WRITE(low, high, real);
+                       CTR_WRITE(reset_value[i], real);
+                       ovf = 1;
+               }
+       }
+
+       /* P4 quirk: you have to re-unmask the apic vector */
+       apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+       /* See op_model_ppro.c */
+       return ovf;
+}
+
+
+static void p4_start(struct op_msrs const * const msrs)
+{
+       unsigned int low, high, stag;
+       int i;
+
+       stag = get_stagger();
+
+       for (i = 0; i < num_counters; ++i) {
+               if (!reset_value[i])
+                       continue;
+               CCCR_READ(low, high, VIRT_CTR(stag, i));
+               CCCR_SET_ENABLE(low);
+               CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+       }
+}
+
+
+static void p4_stop(struct op_msrs const * const msrs)
+{
+       unsigned int low, high, stag;
+       int i;
+
+       stag = get_stagger();
+
+       for (i = 0; i < num_counters; ++i) {
+               CCCR_READ(low, high, VIRT_CTR(stag, i));
+               CCCR_SET_DISABLE(low);
+               CCCR_WRITE(low, high, VIRT_CTR(stag, i));
+       }
+}
+
+
+#ifdef CONFIG_SMP
+struct op_x86_model_spec const op_p4_ht2_spec = {
+       .num_counters = NUM_COUNTERS_HT2,
+       .num_controls = NUM_CONTROLS_HT2,
+       .fill_in_addresses = &p4_fill_in_addresses,
+       .setup_ctrs = &p4_setup_ctrs,
+       .check_ctrs = &p4_check_ctrs,
+       .start = &p4_start,
+       .stop = &p4_stop
+};
+#endif
+
+struct op_x86_model_spec const op_p4_spec = {
+       .num_counters = NUM_COUNTERS_NON_HT,
+       .num_controls = NUM_CONTROLS_NON_HT,
+       .fill_in_addresses = &p4_fill_in_addresses,
+       .setup_ctrs = &p4_setup_ctrs,
+       .check_ctrs = &p4_check_ctrs,
+       .start = &p4_start,
+       .stop = &p4_stop
+};
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_model_ppro.c 
./xen/arch/x86/oprofile/op_model_ppro.c
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_model_ppro.c       
1969-12-31 18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/op_model_ppro.c     2005-08-19 20:36:40 -05:00
@@ -0,0 +1,168 @@
+/**
+ * @file op_model_ppro.h
+ * pentium pro / P6 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ * @author Philippe Elie
+ * @author Graydon Hoare
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <xen/types.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/apic.h>
+#include <asm/processor.h>
+#include <xen/sched.h>
+#include <asm/regs.h>
+#include <asm/current.h>
+ 
+#include "op_x86_model.h"
+#include "op_counter.h"
+
+#define NUM_COUNTERS 2
+#define NUM_CONTROLS 2
+
+#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} 
while (0)
+#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 
-1);} while (0)
+#define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
+
+#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), 
(h));} while (0)
+#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), 
(h));} while (0)
+#define CTRL_SET_ACTIVE(n) (n |= (1<<22))
+#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
+#define CTRL_CLEAR(x) (x &= (1<<21))
+#define CTRL_SET_ENABLE(val) (val |= 1<<20)
+#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
+#define CTRL_SET_UM(val, m) (val |= (m << 8))
+#define CTRL_SET_EVENT(val, e) (val |= e)
+
+static unsigned long reset_value[NUM_COUNTERS];
+ 
+static void ppro_fill_in_addresses(struct op_msrs * const msrs)
+{
+       msrs->counters[0].addr = MSR_P6_PERFCTR0;
+       msrs->counters[1].addr = MSR_P6_PERFCTR1;
+       
+       msrs->controls[0].addr = MSR_P6_EVNTSEL0;
+       msrs->controls[1].addr = MSR_P6_EVNTSEL1;
+}
+
+
+static void ppro_setup_ctrs(struct op_msrs const * const msrs)
+{
+       unsigned int low, high;
+       int i;
+
+       /* clear all counters */
+       for (i = 0 ; i < NUM_CONTROLS; ++i) {
+               CTRL_READ(low, high, msrs, i);
+               CTRL_CLEAR(low);
+               CTRL_WRITE(low, high, msrs, i);
+       }
+       
+       /* avoid a false detection of ctr overflows in NMI handler */
+       for (i = 0; i < NUM_COUNTERS; ++i) {
+               CTR_WRITE(1, msrs, i);
+       }
+
+       /* enable active counters */
+       for (i = 0; i < NUM_COUNTERS; ++i) {
+               if (counter_config[i].enabled) {
+                       reset_value[i] = counter_config[i].count;
+
+                       CTR_WRITE(counter_config[i].count, msrs, i);
+
+                       CTRL_READ(low, high, msrs, i);
+                       CTRL_CLEAR(low);
+                       CTRL_SET_ENABLE(low);
+                       CTRL_SET_USR(low, counter_config[i].user);
+                       CTRL_SET_KERN(low, counter_config[i].kernel);
+                       CTRL_SET_UM(low, counter_config[i].unit_mask);
+                       CTRL_SET_EVENT(low, counter_config[i].event);
+                       CTRL_WRITE(low, high, msrs, i);
+               }
+       }
+}
+
+extern void pmc_log_event(struct domain *d, u64 eip, int mode, int event);
+extern int is_profiled(struct domain * d);
+extern struct domain * primary_profiler;
+
+static int ppro_check_ctrs(unsigned int const cpu, 
+                           struct op_msrs const * const msrs,
+                           struct cpu_user_regs * const regs)
+{
+       unsigned int low, high;
+       int i, ovf = 0;
+       u64 eip = regs->eip;
+       int mode = 0;
+
+       if (RING_1(regs)) 
+               mode = 1;
+       else if (RING_0(regs))
+               mode = 2;
+
+       for (i = 0 ; i < NUM_COUNTERS; ++i) {
+               CTR_READ(low, high, msrs, i);
+               if (CTR_OVERFLOWED(low)) {
+                       pmc_log_event(current->domain, eip, mode, i);
+                       CTR_WRITE(reset_value[i], msrs, i);
+                       ovf = 1;
+               }
+       }
+
+       /* Only P6 based Pentium M need to re-unmask the apic vector but it
+        * doesn't hurt other P6 variant */
+       apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+
+       /* We can't work out if we really handled an interrupt. We
+        * might have caught a *second* counter just after overflowing
+        * the interrupt for this counter then arrives
+        * and we don't find a counter that's overflowed, so we
+        * would return 0 and get dazed + confused. Instead we always
+        * assume we found an overflow. This sucks.
+        */
+       return ovf;
+}
+
+ 
+static void ppro_start(struct op_msrs const * const msrs)
+{
+       unsigned int low,high;
+       CTRL_READ(low, high, msrs, 0);
+       CTRL_SET_ACTIVE(low);
+       CTRL_WRITE(low, high, msrs, 0);
+}
+
+static void ppro_stop(struct op_msrs const * const msrs)
+{
+       unsigned int low,high;
+       CTRL_READ(low, high, msrs, 0);
+       CTRL_SET_INACTIVE(low);
+       CTRL_WRITE(low, high, msrs, 0);
+}
+
+unsigned int read_ctr(struct op_msrs const * const msrs, int i)
+{
+       unsigned int low, high;
+       CTR_READ(low, high, msrs, i);
+       return low;
+}
+
+struct op_x86_model_spec const op_ppro_spec = {
+       .num_counters = NUM_COUNTERS,
+       .num_controls = NUM_CONTROLS,
+       .fill_in_addresses = &ppro_fill_in_addresses,
+       .setup_ctrs = &ppro_setup_ctrs,
+       .check_ctrs = &ppro_check_ctrs,
+       .start = &ppro_start,
+       .stop = &ppro_stop
+};
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_x86_model.h 
./xen/arch/x86/oprofile/op_x86_model.h
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/op_x86_model.h        
1969-12-31 18:00:00 -06:00
+++ ./xen/arch/x86/oprofile/op_x86_model.h      2005-08-18 20:28:44 -05:00
@@ -0,0 +1,55 @@
+/**
+ * @file op_x86_model.h
+ * interface to x86 model-specific MSR operations
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author Graydon Hoare
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#ifndef OP_X86_MODEL_H
+#define OP_X86_MODEL_H

+
+struct op_saved_msr {
+       unsigned int high;
+       unsigned int low;
+};
+
+struct op_msr {
+       unsigned long addr;
+       struct op_saved_msr saved;
+};
+
+struct op_msrs {
+       struct op_msr * counters;
+       struct op_msr * controls;
+};
+
+struct pt_regs;
+
+/* The model vtable abstracts the differences between
+ * various x86 CPU model's perfctr support.
+ */
+struct op_x86_model_spec {
+       unsigned int const num_counters;
+       unsigned int const num_controls;
+       void (*fill_in_addresses)(struct op_msrs * const msrs);
+       void (*setup_ctrs)(struct op_msrs const * const msrs);
+       int (*check_ctrs)(unsigned int const cpu, 
+               struct op_msrs const * const msrs,
+               struct cpu_user_regs * const regs);
+       void (*start)(struct op_msrs const * const msrs);
+       void (*stop)(struct op_msrs const * const msrs);
+};
+
+extern struct op_x86_model_spec const op_ppro_spec;
+extern struct op_x86_model_spec const op_p4_spec;
+extern struct op_x86_model_spec const op_p4_ht2_spec;
+extern struct op_x86_model_spec const op_athlon_spec;
+
+#endif /* OP_X86_MODEL_H */
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/oprofile/pmc.c 
./xen/arch/x86/oprofile/pmc.c
--- ../xen-unstable.hg-6251/xen/arch/x86/oprofile/pmc.c 1969-12-31 18:00:00 
-06:00
+++ ./xen/arch/x86/oprofile/pmc.c       2005-08-19 20:34:32 -05:00
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * written by Aravind Menon, email: xenoprof@xxxxxxxxxxxxx
+ */
+
+#include <xen/sched.h>
+#include <asm/current.h>
+
+#include "op_counter.h"
+
+int active_domains[MAX_OPROF_DOMAINS];
+int passive_domains[MAX_OPROF_DOMAINS];
+unsigned int adomains = 0;
+unsigned int pdomains = 0;
+unsigned int activated = 0;
+
+#define VIRQ_BITMASK_SIZE      (MAX_OPROF_DOMAINS/32 + 1)
+
+struct domain * primary_profiler = NULL;
+struct domain * adomain_ptrs[MAX_OPROF_DOMAINS];
+unsigned int virq_ovf_pending[VIRQ_BITMASK_SIZE];
+
+int is_active(struct domain *d) 
+{
+       int i;
+       for (i = 0; i < adomains; i++)
+               if (d->domain_id == active_domains[i])
+                       return 1;
+       return 0;
+}
+
+int active_id(struct domain *d)
+{
+       int i;
+       for (i = 0; i < adomains; i++)
+               if (d == adomain_ptrs[i])
+                       return i;
+       return -1;
+}
+
+void free_adomain_ptrs() 
+{
+       int i;
+       int num = adomains;
+
+       adomains = 0;
+       for (i = 0; i < VIRQ_BITMASK_SIZE; i++)
+               virq_ovf_pending[i] = 0;
+
+       for (i = 0; i < num; i++) {
+               put_domain(adomain_ptrs[i]);
+               adomain_ptrs[i] = NULL;
+       }
+}
+
+int set_adomain_ptrs(int num)
+{
+       int i;
+       struct domain *d;
+
+       for (i = 0; i < VIRQ_BITMASK_SIZE; i++)
+               virq_ovf_pending[i] = 0;
+
+       for (i = 0; i < num; i++) {
+               d = find_domain_by_id(active_domains[i]);
+               if (!d) {
+                       free_adomain_ptrs();
+                       return -EFAULT;
+               }
+               adomain_ptrs[i] = d;
+               adomains++;
+       }
+       return 0;
+}
+
+int set_active(struct domain *d)
+{
+       if (is_active(d))
+               return 0;
+       /* hack if we run out of space */
+       if (adomains >= MAX_OPROF_DOMAINS) {
+               adomains--;
+               put_domain(adomain_ptrs[adomains]);
+       }
+       active_domains[adomains] = d->domain_id;
+       if (get_domain(d))
+               adomain_ptrs[adomains++] = d;
+       else {
+               free_adomain_ptrs();
+               return -EFAULT;
+       }
+       return 0;
+}
+
+int is_passive(struct domain *d)
+{
+       int i;
+       for (i = 0; i < pdomains; i++)
+               if (d->domain_id == passive_domains[i])
+                       return 1;
+       return 0;
+}
+
+int is_profiled(struct domain *d)
+{
+       if (is_active(d) || is_passive(d))
+               return 1;
+       return 0;
+}
+
+void pmc_log_event(struct domain *d, u64 eip, int mode, int event) 
+{
+       shared_info_t *s = NULL;
+       struct domain *dest = d;
+       int head;
+       int tail;
+
+       if (!is_profiled(d))
+               return;
+
+       if (!is_passive(d)) {
+               s = dest->shared_info;
+               head = s->event_head;
+               tail = s->event_tail;
+               if ((head == tail - 1) || 
+                   (head == MAX_OPROF_EVENTS - 1 && tail == 0)) {
+                       s->losing_samples = 1;
+                       s->samples_lost++;
+               }
+               else {
+                       s->event_log[head].eip = eip;
+                       s->event_log[head].mode = mode;
+                       s->event_log[head].event = event;
+                       head++;
+                       if (head >= MAX_OPROF_EVENTS)
+                               head = 0;
+                       s->event_head = head;
+               }
+       }
+       /* passive domains */
+       else {
+               dest = primary_profiler;
+               s = dest->shared_info;
+               head = s->event_head;
+               tail = s->event_tail;
+
+               /* We use the following inefficient format for logging 
+                  events from other domains. We put a special record 
+                   indicating that the next record is for another domain. 
+                  This is done for each sample from another domain */ 
+
+               head = s->event_head;
+               if (head >= MAX_OPROF_EVENTS)
+                       head = 0;
+               /* for passive domains we need to have at least two 
+                  entries empty in the buffer */
+               if ((head == tail - 1) || 
+                   (head == tail - 2) ||
+                   (head == MAX_OPROF_EVENTS - 1 && tail <= 1) ||
+                   (head == MAX_OPROF_EVENTS - 2 && tail == 0) ) {
+                       s->losing_samples = 1;
+                       s->samples_lost++;
+               }
+               else {
+                       s->event_log[head].eip = ~1;
+                       s->event_log[head].mode = ~0;
+                       s->event_log[head].event = d->domain_id;
+                       head++;
+                       if (head >= MAX_OPROF_EVENTS)
+                               head = 0;
+                       s->event_log[head].eip = eip;
+                       s->event_log[head].mode = mode;
+                       s->event_log[head].event = event;
+                       head++;
+                       if (head >= MAX_OPROF_EVENTS)
+                               head = 0;
+                               s->event_head = head;
+               }
+       }
+}
+
+static void pmc_event_init(struct domain *d)
+{
+       shared_info_t *s = d->shared_info;
+       s->event_head = 0;
+       s->event_tail = 0;
+       s->losing_samples = 0;
+       s->samples_lost = 0;
+       s->nmi_restarts = 0;
+       s->active_samples = 0;
+       s->passive_samples = 0;
+       s->other_samples = 0;
+}
+
+extern int nmi_init(int *num_events, int *is_primary);
+extern int nmi_reserve_counters(void);
+extern int nmi_setup_events(void);
+extern int nmi_enable_virq(void);
+extern int nmi_start(void);
+extern void nmi_stop(void);
+extern void nmi_disable_virq(void);
+extern void nmi_release_counters(void);
+
+#define PRIV_OP(op)    ((op == PMC_SET_ACTIVE) || (op == PMC_SET_PASSIVE) || 
(op == PMC_RESERVE_COUNTERS) \
+                       || (op == PMC_SETUP_EVENTS) || (op == PMC_START) || (op 
== PMC_STOP) \
+                       || (op == PMC_RELEASE_COUNTERS) || (op == PMC_SHUTDOWN))
+
+int do_pmc_op(int op, u64 arg1, u64 arg2)
+{
+       int ret = 0;
+
+       if (PRIV_OP(op) && current->domain != primary_profiler)
+               return -EPERM;
+
+       switch (op) {
+               case PMC_INIT:
+                       printk("PMC_INIT]\n");
+                       ret = nmi_init((int *)arg1, (int *)arg2);
+                       printk("nmi_init returned %d\n", ret);
+                       break;
+
+               case PMC_SET_ACTIVE:
+                       printk("PMC_SETACTIVE]\n");
+                       if (adomains != 0)
+                               return -EPERM;
+                       if (copy_from_user((void *)&active_domains,
+                               (void *)arg1, arg2*sizeof(int)))
+                               return -EFAULT;
+                       if (set_adomain_ptrs(arg2))
+                               return -EFAULT;
+                       if (set_active(current->domain))
+                               return -EFAULT;
+                       break;
+
+               case PMC_SET_PASSIVE:
+                       printk("PMC_SETPASSIVE\n");
+                       if (pdomains != 0)
+                               return -EPERM;
+                       if (copy_from_user((void *)&passive_domains,
+                               (void *)arg1, arg2*sizeof(int)))
+                               return -EFAULT;
+                       pdomains = arg2;
+                       break;
+
+               case PMC_RESERVE_COUNTERS:
+                       printk("PMC_RESERVE_COUNTERS\n");
+                       ret = nmi_reserve_counters();
+                       break;
+
+               case PMC_SETUP_EVENTS:
+                       printk("PMV_SETUP_EVENTS\n");
+                       if (copy_from_user((void *)&counter_config, 
+                               (void *)arg1, arg2*sizeof(struct 
op_counter_config)))
+                               return -EFAULT;
+                       ret = nmi_setup_events();
+                       break;
+
+               case PMC_ENABLE_VIRQ:
+                       printk("PMC_ENABLE_VIRQ\n");
+                       if (!is_active(current->domain)) {
+                               if (current->domain != primary_profiler)
+                                       return -EPERM;
+                               else
+                                       set_active(current->domain);
+                       }
+                       ret = nmi_enable_virq();
+                       pmc_event_init(current->domain);
+                       activated++;
+                       break;
+
+               case PMC_START:
+                       printk("PMC_START\n");
+                       if (activated < adomains)
+                               return -EPERM;
+                       ret = nmi_start();
+                       break;
+
+               case PMC_STOP:
+                       printk("PMC_STOP\n");
+                       nmi_stop();
+                       break;
+
+               case PMC_DISABLE_VIRQ:
+                       printk("PMC_DISBALE_VIRQ\n");
+                       if (!is_active(current->domain))
+                               return -EPERM;
+                       nmi_disable_virq();
+                       activated--;
+                       break;
+
+               case PMC_RELEASE_COUNTERS:
+                       printk("PMC_RELEASE_COUNTERS\n");
+                       nmi_release_counters();
+                       break;
+
+               case PMC_SHUTDOWN:
+                       printk("PMC_SHUTDOWN\n");
+                       free_adomain_ptrs();
+                       pdomains = 0;
+                       activated = 0;
+                       primary_profiler = NULL;
+                       break;
+
+               default:
+                       ret = -EINVAL;
+       }
+       return ret;
+}
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/traps.c ./xen/arch/x86/traps.c
--- ../xen-unstable.hg-6251/xen/arch/x86/traps.c        2005-08-19 23:46:22 
-05:00
+++ ./xen/arch/x86/traps.c      2005-08-18 20:28:44 -05:00
@@ -2,6 +2,10 @@
  * arch/x86/traps.c
  * 
  * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
+ *
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  * 
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -54,6 +58,7 @@
 #include <asm/debugger.h>
 #include <asm/msr.h>
 #include <asm/x86_emulate.h>
+#include <asm/nmi.h>
 
 /*
  * opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
@@ -1040,7 +1045,7 @@ static void unknown_nmi_error(unsigned c
     printk("Do you have a strange power saving mode enabled?\n");
 }
 
-asmlinkage void do_nmi(struct cpu_user_regs *regs, unsigned long reason)
+static void default_do_nmi(struct cpu_user_regs * regs, unsigned long reason)
 {
     ++nmi_count(smp_processor_id());
 
@@ -1055,6 +1060,35 @@ asmlinkage void do_nmi(struct cpu_user_r
         unknown_nmi_error((unsigned char)(reason&0xff));
 }
 
+static int dummy_nmi_callback(struct cpu_user_regs * regs, int cpu)
+{
+        return 0;
+}
+
+static nmi_callback_t nmi_callback = dummy_nmi_callback;
+
+asmlinkage void do_nmi(struct cpu_user_regs * regs, unsigned long reason)
+{
+       int cpu;
+    cpu = smp_processor_id();
+
+    if (!nmi_callback(regs, cpu)) 
+        default_do_nmi(regs, reason);
+}
+
+void set_nmi_callback(nmi_callback_t callback)
+{
+    nmi_callback = callback;
+}
+ 
+void unset_nmi_callback(void)
+{
+    nmi_callback = dummy_nmi_callback;
+}
+ 
+EXPORT_SYMBOL(set_nmi_callback);
+EXPORT_SYMBOL(unset_nmi_callback);
+ 
 asmlinkage int math_state_restore(struct cpu_user_regs *regs)
 {
     /* Prevent recursion. */
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/x86_32/entry.S 
./xen/arch/x86/x86_32/entry.S
--- ../xen-unstable.hg-6251/xen/arch/x86/x86_32/entry.S 2005-08-19 23:46:22 
-05:00
+++ ./xen/arch/x86/x86_32/entry.S       2005-08-18 20:28:44 -05:00
@@ -763,7 +763,8 @@ ENTRY(hypercall_table)
         .long do_boot_vcpu
         .long do_ni_hypercall       /* 25 */
         .long do_mmuext_op
-        .long do_acm_op             /* 27 */
+        .long do_acm_op
+       .long do_pmc_op             /* 28 */
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .long do_ni_hypercall
         .endr
diff -Naurp ../xen-unstable.hg-6251/xen/arch/x86/x86_64/entry.S 
./xen/arch/x86/x86_64/entry.S
--- ../xen-unstable.hg-6251/xen/arch/x86/x86_64/entry.S 2005-08-19 23:46:22 
-05:00
+++ ./xen/arch/x86/x86_64/entry.S       2005-08-18 20:37:21 -05:00
@@ -593,6 +593,7 @@ ENTRY(hypercall_table)
         .quad do_set_segment_base   /* 25 */
         .quad do_mmuext_op
         .quad do_acm_op
+       .quad do_pmc_op
         .rept NR_hypercalls-((.-hypercall_table)/4)
         .quad do_ni_hypercall
         .endr
diff -Naurp ../xen-unstable.hg-6251/xen/include/asm-x86/msr.h 
./xen/include/asm-x86/msr.h
--- ../xen-unstable.hg-6251/xen/include/asm-x86/msr.h   2005-08-19 23:46:23 
-05:00
+++ ./xen/include/asm-x86/msr.h 2005-08-18 20:28:44 -05:00
@@ -195,6 +195,89 @@
 #define MSR_P6_EVNTSEL0                        0x186
 #define MSR_P6_EVNTSEL1                        0x187
 
+/* Pentium IV performance counter MSRs */
+#define MSR_P4_BPU_PERFCTR0            0x300
+#define MSR_P4_BPU_PERFCTR1            0x301
+#define MSR_P4_BPU_PERFCTR2            0x302
+#define MSR_P4_BPU_PERFCTR3            0x303
+#define MSR_P4_MS_PERFCTR0             0x304
+#define MSR_P4_MS_PERFCTR1             0x305
+#define MSR_P4_MS_PERFCTR2             0x306
+#define MSR_P4_MS_PERFCTR3             0x307
+#define MSR_P4_FLAME_PERFCTR0          0x308
+#define MSR_P4_FLAME_PERFCTR1          0x309
+#define MSR_P4_FLAME_PERFCTR2          0x30a
+#define MSR_P4_FLAME_PERFCTR3          0x30b
+#define MSR_P4_IQ_PERFCTR0             0x30c
+#define MSR_P4_IQ_PERFCTR1             0x30d
+#define MSR_P4_IQ_PERFCTR2             0x30e
+#define MSR_P4_IQ_PERFCTR3             0x30f
+#define MSR_P4_IQ_PERFCTR4             0x310
+#define MSR_P4_IQ_PERFCTR5             0x311
+#define MSR_P4_BPU_CCCR0               0x360
+#define MSR_P4_BPU_CCCR1               0x361
+#define MSR_P4_BPU_CCCR2               0x362
+#define MSR_P4_BPU_CCCR3               0x363
+#define MSR_P4_MS_CCCR0                0x364
+#define MSR_P4_MS_CCCR1                0x365
+#define MSR_P4_MS_CCCR2                0x366
+#define MSR_P4_MS_CCCR3                0x367
+#define MSR_P4_FLAME_CCCR0             0x368
+#define MSR_P4_FLAME_CCCR1             0x369
+#define MSR_P4_FLAME_CCCR2             0x36a
+#define MSR_P4_FLAME_CCCR3             0x36b
+#define MSR_P4_IQ_CCCR0                0x36c
+#define MSR_P4_IQ_CCCR1                0x36d
+#define MSR_P4_IQ_CCCR2                0x36e
+#define MSR_P4_IQ_CCCR3                0x36f
+#define MSR_P4_IQ_CCCR4                0x370
+#define MSR_P4_IQ_CCCR5                0x371
+#define MSR_P4_ALF_ESCR0               0x3ca
+#define MSR_P4_ALF_ESCR1               0x3cb
+#define MSR_P4_BPU_ESCR0               0x3b2
+#define MSR_P4_BPU_ESCR1               0x3b3
+#define MSR_P4_BSU_ESCR0               0x3a0
+#define MSR_P4_BSU_ESCR1               0x3a1
+#define MSR_P4_CRU_ESCR0               0x3b8
+#define MSR_P4_CRU_ESCR1               0x3b9
+#define MSR_P4_CRU_ESCR2               0x3cc
+#define MSR_P4_CRU_ESCR3               0x3cd
+#define MSR_P4_CRU_ESCR4               0x3e0
+#define MSR_P4_CRU_ESCR5               0x3e1
+#define MSR_P4_DAC_ESCR0               0x3a8
+#define MSR_P4_DAC_ESCR1               0x3a9
+#define MSR_P4_FIRM_ESCR0              0x3a4
+#define MSR_P4_FIRM_ESCR1              0x3a5
+#define MSR_P4_FLAME_ESCR0             0x3a6
+#define MSR_P4_FLAME_ESCR1             0x3a7
+#define MSR_P4_FSB_ESCR0               0x3a2
+#define MSR_P4_FSB_ESCR1               0x3a3
+#define MSR_P4_IQ_ESCR0                0x3ba
+#define MSR_P4_IQ_ESCR1                0x3bb
+#define MSR_P4_IS_ESCR0                0x3b4
+#define MSR_P4_IS_ESCR1                0x3b5
+#define MSR_P4_ITLB_ESCR0              0x3b6
+#define MSR_P4_ITLB_ESCR1              0x3b7
+#define MSR_P4_IX_ESCR0                0x3c8
+#define MSR_P4_IX_ESCR1                0x3c9
+#define MSR_P4_MOB_ESCR0               0x3aa
+#define MSR_P4_MOB_ESCR1               0x3ab
+#define MSR_P4_MS_ESCR0                0x3c0
+#define MSR_P4_MS_ESCR1                0x3c1
+#define MSR_P4_PMH_ESCR0               0x3ac
+#define MSR_P4_PMH_ESCR1               0x3ad
+#define MSR_P4_RAT_ESCR0               0x3bc
+#define MSR_P4_RAT_ESCR1               0x3bd
+#define MSR_P4_SAAT_ESCR0              0x3ae
+#define MSR_P4_SAAT_ESCR1              0x3af
+#define MSR_P4_SSU_ESCR0               0x3be
+#define MSR_P4_SSU_ESCR1               0x3bf    /* guess: not defined in 
manual */
+#define MSR_P4_TBPU_ESCR0              0x3c2
+#define MSR_P4_TBPU_ESCR1              0x3c3
+#define MSR_P4_TC_ESCR0                0x3c4
+#define MSR_P4_TC_ESCR1                0x3c5
+#define MSR_P4_U2L_ESCR0               0x3b0
+#define MSR_P4_U2L_ESCR1               0x3b1
 
 /* K7/K8 MSRs. Not complete. See the architecture manual for a more complete 
list. */
 #define MSR_K7_EVNTSEL0            0xC0010000
diff -Naurp ../xen-unstable.hg-6251/xen/include/asm-x86/nmi.h 
./xen/include/asm-x86/nmi.h
--- ../xen-unstable.hg-6251/xen/include/asm-x86/nmi.h   1969-12-31 18:00:00 
-06:00
+++ ./xen/include/asm-x86/nmi.h 2005-08-18 20:28:44 -05:00
@@ -0,0 +1,26 @@
+/*
+ *  linux/include/asm-i386/nmi.h
+ */
+#ifndef ASM_NMI_H
+#define ASM_NMI_H
+
+struct cpu_user_regs;
+ 
+typedef int (*nmi_callback_t)(struct cpu_user_regs * regs, int cpu);
+ 
+/** 
+ * set_nmi_callback
+ *
+ * Set a handler for an NMI. Only one handler may be
+ * set. Return 1 if the NMI was handled.
+ */
+void set_nmi_callback(nmi_callback_t callback);
+ 
+/** 
+ * unset_nmi_callback
+ *
+ * Remove the handler previously set.
+ */
+void unset_nmi_callback(void);
+ 
+#endif /* ASM_NMI_H */
diff -Naurp ../xen-unstable.hg-6251/xen/include/public/xen.h 
./xen/include/public/xen.h
--- ../xen-unstable.hg-6251/xen/include/public/xen.h    2005-08-19 23:46:23 
-05:00
+++ ./xen/include/public/xen.h  2005-08-19 20:34:10 -05:00
@@ -4,6 +4,10 @@
  * Guest OS interface to Xen.
  * 
  * Copyright (c) 2004, K A Fraser
+ *
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  */
 
 #ifndef __XEN_PUBLIC_XEN_H__
@@ -59,6 +63,7 @@
 #define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
 #define __HYPERVISOR_mmuext_op            26
 #define __HYPERVISOR_acm_op               27
+#define __HYPERVISOR_pmc_op               28
 
 /* 
  * VIRTUAL INTERRUPTS
@@ -72,7 +77,8 @@
 #define VIRQ_PARITY_ERR 4  /* (DOM0) NMI parity error.                    */
 #define VIRQ_IO_ERR     5  /* (DOM0) NMI I/O error.                       */
 #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
-#define NR_VIRQS        7
+#define VIRQ_PMC_OVF   7  /* PMC Overflow */
+#define NR_VIRQS        8
 
 /*
  * MMU-UPDATE REQUESTS
@@ -239,6 +245,21 @@ struct mmuext_op {
 #define VMASST_TYPE_writable_pagetables  2
 #define MAX_VMASST_TYPE 2
 
+/*
+ * Commands to HYPERVISOR_pmc_op().
+ */
+#define PMC_INIT               0
+#define PMC_SET_ACTIVE         1
+#define PMC_SET_PASSIVE                2
+#define PMC_RESERVE_COUNTERS   3
+#define PMC_SETUP_EVENTS       4
+#define PMC_ENABLE_VIRQ                5
+#define PMC_START              6
+#define PMC_STOP               7
+#define PMC_DISABLE_VIRQ       8
+#define PMC_RELEASE_COUNTERS   9
+#define PMC_SHUTDOWN           10
+
 #ifndef __ASSEMBLY__
 
 typedef u16 domid_t;
@@ -291,6 +312,8 @@ typedef struct
 /* Event channel endpoints per domain. */
 #define NR_EVENT_CHANNELS 1024
 
+#define MAX_OPROF_EVENTS       32
+#define MAX_OPROF_DOMAINS      25      
 /*
  * Per-VCPU information goes here. This will be cleaned up more when Xen 
  * actually supports multi-VCPU guests.
@@ -406,6 +429,21 @@ typedef struct shared_info {
     u32 wc_nsec;         /* Nsecs 00:00:00 UTC, Jan 1, 1970.  */
 
     arch_shared_info_t arch;
+
+    /* Oprofile structures */
+    u8 event_head;
+    u8 event_tail;
+    struct {
+       u64 eip;
+       u8 mode;
+       u8 event;
+    } event_log[MAX_OPROF_EVENTS];
+    u8 losing_samples;
+    u64 samples_lost;
+    u32 nmi_restarts;
+    u64 active_samples;
+    u64 passive_samples;
+    u64 other_samples;
 
 } shared_info_t;
 

diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/Kconfig 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/Kconfig
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/Kconfig 2005-08-22 
19:43:15 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/Kconfig        
2005-08-22 20:17:51 -05:00
@@ -200,4 +200,6 @@ source "crypto/Kconfig"
 
 source "lib/Kconfig"
 
+source "arch/xen/oprofile/Kconfig"
+
 source "arch/xen/Kconfig.debug"
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/Makefile 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/Makefile
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/Makefile        
2005-08-22 19:43:15 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/Makefile       
2005-08-22 20:17:51 -05:00
@@ -32,6 +32,8 @@ ifneq ($(KBUILD_SRC),)
        $(Q)ln -fsn ../include/asm-$(XENARCH) include2/asm
 endif
 
+drivers-$(CONFIG_OPROFILE)    += arch/xen/oprofile/
+
 include/.asm-ignore: include/asm
        @rm -f include/.asm-ignore
        @mv include/asm include/.asm-ignore
diff -Naurp 
xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32
 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32
--- 
xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32
   2005-08-22 19:43:15 -05:00
+++ 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/configs/xen0_defconfig_x86_32
  2005-08-22 20:17:51 -05:00
@@ -79,6 +79,12 @@ CONFIG_OBSOLETE_MODPARM=y
 CONFIG_KMOD=y
 
 #
+# OProfile options
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+
+#
 # X86 Processor Configuration
 #
 CONFIG_XENARCH="i386"
diff -Naurp 
xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32
 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32
--- 
xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32
   2005-08-22 19:43:15 -05:00
+++ 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/configs/xenU_defconfig_x86_32
  2005-08-22 20:17:51 -05:00
@@ -76,6 +76,12 @@ CONFIG_KMOD=y
 CONFIG_STOP_MACHINE=y
 
 #
+# OProfile options
+#
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=m
+
+#
 # X86 Processor Configuration
 #
 CONFIG_XENARCH="i386"
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/i386/Makefile 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/i386/Makefile
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/i386/Makefile   
2005-08-22 19:43:15 -05:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/i386/Makefile  
2005-08-22 20:17:51 -05:00
@@ -84,7 +84,6 @@ core-y                                        += 
arch/xen/i386/kernel/ \
 drivers-$(CONFIG_MATH_EMULATION)       += arch/i386/math-emu/
 drivers-$(CONFIG_PCI)                  += arch/xen/i386/pci/
 # must be linked after kernel/
-drivers-$(CONFIG_OPROFILE)             += arch/i386/oprofile/
 drivers-$(CONFIG_PM)                   += arch/i386/power/
 
 # for clean
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c 
2005-08-22 19:43:15 -05:00
+++ 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/kernel/evtchn.c    
    2005-08-22 20:17:51 -05:00
@@ -44,11 +44,16 @@
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/evtchn.h>
 
+int virq_to_phys(int virq);
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 EXPORT_SYMBOL(force_evtchn_callback);
 EXPORT_SYMBOL(evtchn_do_upcall);
 EXPORT_SYMBOL(bind_evtchn_to_irq);
 EXPORT_SYMBOL(unbind_evtchn_from_irq);
+EXPORT_SYMBOL(virq_to_phys);
+EXPORT_SYMBOL(bind_virq_to_irq);
+EXPORT_SYMBOL(unbind_virq_from_irq);
 #endif
 
 /*
@@ -178,6 +183,15 @@ static int find_unbound_irq(void)
         panic("No available IRQ to bind to: increase NR_IRQS!\n");
 
     return irq;
+}
+
+int virq_to_phys(int virq)
+{
+        int cpu = smp_processor_id();
+
+       if (virq >= NR_VIRQS)
+               return -1;
+       return per_cpu(virq_to_irq,cpu)[virq];
 }
 
 int bind_virq_to_irq(int virq)
diff -Naurp 
xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig        
1969-12-31 18:00:00 -06:00
+++ 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/Kconfig   
    2005-08-22 20:17:51 -05:00
@@ -0,0 +1,23 @@
+
+menu "Profiling support"
+       depends on EXPERIMENTAL
+
+config PROFILING
+       bool "Profiling support (EXPERIMENTAL)"
+       help
+         Say Y here to enable the extended profiling support mechanisms used
+         by profilers such as OProfile.
+         
+
+config OPROFILE
+       tristate "OProfile system profiling (EXPERIMENTAL)"
+       depends on PROFILING
+       help
+         OProfile is a profiling system capable of profiling the
+         whole system, include the kernel, kernel modules, libraries,
+         and applications.
+
+         If unsure, say N.
+
+endmenu
+
diff -Naurp 
xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/Makefile 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/Makefile
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/Makefile       
1969-12-31 18:00:00 -06:00
+++ 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/Makefile  
    2005-08-22 20:17:51 -05:00
@@ -0,0 +1,9 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
+               oprof.o cpu_buffer.o buffer_sync.o \
+               event_buffer.o oprofile_files.o \
+               oprofilefs.o oprofile_stats.o  \
+               timer_int.o )
+
+oprofile-y                             := $(DRIVER_OBJS) pmc.o
diff -Naurp 
xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h   
1969-12-31 18:00:00 -06:00
+++ 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/op_counter.h
  2005-08-22 20:17:51 -05:00
@@ -0,0 +1,29 @@
+/**
+ * @file op_counter.h
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon
+ */
+ 
+#ifndef OP_COUNTER_H
+#define OP_COUNTER_H
+
+#define OP_MAX_COUNTER 8
+ 
+/* Per-perfctr configuration as set via
+ * oprofilefs.
+ */
+struct op_counter_config {
+        unsigned long count;
+        unsigned long enabled;
+        unsigned long event;
+        unsigned long kernel;
+        unsigned long user;
+        unsigned long unit_mask;
+};
+
+extern struct op_counter_config counter_config[];
+
+#endif /* OP_COUNTER_H */
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c  
1969-12-31 18:00:00 -06:00
+++ xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/oprofile/pmc.c 
2005-08-22 20:17:51 -05:00
@@ -0,0 +1,323 @@
+/**
+ * @file nmi_int.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ */
+
+#include <linux/init.h>
+#include <linux/notifier.h>
+#include <linux/smp.h>
+#include <linux/oprofile.h>
+#include <linux/sysdev.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <asm/nmi.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+ 
+#include "op_counter.h"
+ 
+static int pmc_start(void);
+static void pmc_stop(void);
+
+/* 0 == registered but off, 1 == registered and on */
+static int pmc_enabled = 0;
+static int num_events = 0;
+static int is_primary = 0;
+
+#ifdef CONFIG_PM
+
+static int pmc_suspend(struct sys_device *dev, u32 state)
+{
+       if (pmc_enabled == 1)
+               pmc_stop();
+       return 0;
+}
+
+
+static int pmc_resume(struct sys_device *dev)
+{
+       if (pmc_enabled == 1)
+               pmc_start();
+       return 0;
+}
+
+
+static struct sysdev_class oprofile_sysclass = {
+       set_kset_name("oprofile"),
+       .resume         = pmc_resume,
+       .suspend        = pmc_suspend,
+};
+
+
+static struct sys_device device_oprofile = {
+       .id     = 0,
+       .cls    = &oprofile_sysclass,
+};
+
+
+static int __init init_driverfs(void)
+{
+       int error;
+       if (!(error = sysdev_class_register(&oprofile_sysclass)))
+               error = sysdev_register(&device_oprofile);
+       return error;
+}
+
+
+static void __exit exit_driverfs(void)
+{
+       sysdev_unregister(&device_oprofile);
+       sysdev_class_unregister(&oprofile_sysclass);
+}
+
+#else
+#define init_driverfs() do { } while (0)
+#define exit_driverfs() do { } while (0)
+#endif /* CONFIG_PM */
+
+unsigned long long oprofile_samples = 0;
+
+static irqreturn_t pmc_ovf_interrupt (int irq, void *dev_id, struct pt_regs 
*regs)
+{
+       int head, tail;
+       shared_info_t *s = HYPERVISOR_shared_info;
+
+       head = s->event_head;
+       tail = s->event_tail;
+
+       /* oprofile_add_sample will also handle samples from other domains */
+
+       if (tail > head) {
+               while (tail < MAX_OPROF_EVENTS) {
+                       oprofile_add_sample_xen(s->event_log[tail].eip, 
+                               s->event_log[tail].mode, 
+                               s->event_log[tail].event);
+                       /*printk(KERN_INFO "pmc_sample: %p, %d, %d\n", 
+                               s->event_log[tail].eip, s->event_log[tail].mode,
+                               s->event_log[tail].event);*/
+                       oprofile_samples++;
+                       tail++;
+               }
+               tail = 0;
+       }
+       while (tail < head) {
+               oprofile_add_sample_xen(s->event_log[tail].eip, 
+                       s->event_log[tail].mode, s->event_log[tail].event);
+               /*printk(KERN_INFO "pmc_sample: %p, %d, %d\n", 
+                       s->event_log[tail].eip, s->event_log[tail].mode,
+                       s->event_log[tail].event);*/
+               oprofile_samples++;
+               tail++;
+       }
+
+       s->event_tail = tail;
+       s->losing_samples = 0;
+
+       return IRQ_HANDLED;
+}
+
+extern int virq_to_phys(int virq);
+
+static int pmc_setup(void)
+{
+       int ret;
+
+       if ((ret = request_irq(bind_virq_to_irq(VIRQ_PMC_OVF), 
+               pmc_ovf_interrupt, SA_INTERRUPT, "pmc_ovf", NULL)))
+               goto release_irq;
+
+       if (is_primary) {
+               ret = HYPERVISOR_pmc_op(PMC_RESERVE_COUNTERS, (u64)NULL, 
(u64)NULL);
+               //printk(KERN_INFO "pmc_setup: reserve_counters: ret %d\n", 
ret);
+       
+               ret = HYPERVISOR_pmc_op(PMC_SETUP_EVENTS, (u64)&counter_config, 
(u64)num_events);
+               //printk(KERN_INFO "pmc_setup: setup_events: ret %d\n", ret);
+       }
+
+       ret = HYPERVISOR_pmc_op(PMC_ENABLE_VIRQ, (u64)NULL, (u64)NULL);
+       //printk(KERN_INFO "pmc_setup: enable_virq: ret %d\n", ret);
+
+       pmc_enabled = 1;
+       return 0;
+
+release_irq:
+       free_irq(virq_to_phys(VIRQ_PMC_OVF), NULL);
+       unbind_virq_from_irq(VIRQ_PMC_OVF);
+
+       return ret;
+}
+
+static void pmc_shutdown(void)
+{
+       int ret;
+       pmc_enabled = 0;
+
+       ret = HYPERVISOR_pmc_op(PMC_DISABLE_VIRQ, (u64)NULL, (u64)NULL);
+       //printk(KERN_INFO "pmc_shutdown: disable_virq: ret %d\n", ret);
+
+       if (is_primary) {
+               ret = HYPERVISOR_pmc_op(PMC_RELEASE_COUNTERS, (u64)NULL, 
(u64)NULL);
+               //printk(KERN_INFO "pmc_shutdown: release_counters: ret %d\n", 
ret);
+       }
+
+       free_irq(virq_to_phys(VIRQ_PMC_OVF), NULL);
+       unbind_virq_from_irq(VIRQ_PMC_OVF);
+}
+
+static int pmc_start(void)
+{
+       int ret = 0;
+       if (is_primary)
+               ret = HYPERVISOR_pmc_op(PMC_START, (u64)NULL, (u64)NULL);
+       //printk(KERN_INFO "pmc_start: ret %d\n", ret);
+       return ret;
+}
+ 
+static void pmc_stop(void)
+{
+       int ret = 0;
+       if (is_primary)
+               ret = HYPERVISOR_pmc_op(PMC_STOP, (u64)NULL, (u64)NULL);
+       //printk(KERN_INFO "pmc_stop: ret %d\n", ret);
+       printk(KERN_INFO "pmc: oprofile samples %llu, active %llu, passive 
%llu, other %llu, buffering losses %llu, NMI restarted %d\n", 
+               oprofile_samples, HYPERVISOR_shared_info->active_samples, 
HYPERVISOR_shared_info->passive_samples,
+               HYPERVISOR_shared_info->other_samples, 
HYPERVISOR_shared_info->samples_lost, HYPERVISOR_shared_info->nmi_restarts);
+}
+
+static int pmc_set_active(int *active_domains, unsigned int adomains)
+{
+       int ret = 0;
+       if (is_primary) 
+               ret = HYPERVISOR_pmc_op(PMC_SET_ACTIVE, 
+                       (u64)active_domains, (u64)adomains); 
+       return ret;
+}
+
+static int pmc_set_passive(int *passive_domains, unsigned int pdomains)
+{
+       int ret = 0;
+       if (is_primary)
+               ret = HYPERVISOR_pmc_op(PMC_SET_PASSIVE,
+                       (u64)passive_domains, (u64)pdomains);
+       return ret;
+}
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
+static int pmc_create_files(struct super_block * sb, struct dentry * root)
+{
+       unsigned int i;
+
+       for (i = 0; i < num_events; ++i) {
+               struct dentry * dir;
+               char buf[2];
+ 
+               snprintf(buf, 2, "%d", i);
+               dir = oprofilefs_mkdir(sb, root, buf);
+               oprofilefs_create_ulong(sb, dir, "enabled", 
&counter_config[i].enabled); 
+               oprofilefs_create_ulong(sb, dir, "event", 
&counter_config[i].event); 
+               oprofilefs_create_ulong(sb, dir, "count", 
&counter_config[i].count); 
+               oprofilefs_create_ulong(sb, dir, "unit_mask", 
&counter_config[i].unit_mask); 
+               oprofilefs_create_ulong(sb, dir, "kernel", 
&counter_config[i].kernel); 
+               oprofilefs_create_ulong(sb, dir, "user", 
&counter_config[i].user); 
+       }
+
+       //printk(KERN_INFO "pmc_create_files\n");
+       return 0;
+}
+ 
+ 
+struct oprofile_operations pmc_ops = {
+       .create_files   = pmc_create_files,
+       .set_active     = pmc_set_active,
+       .set_passive    = pmc_set_passive,
+       .setup          = pmc_setup,
+       .shutdown       = pmc_shutdown,
+       .start          = pmc_start,
+       .stop           = pmc_stop
+};
+ 
+
+static void __init p4_init(void)
+{
+       __u8 cpu_model = current_cpu_data.x86_model;
+
+       if (cpu_model > 3)
+               pmc_ops.cpu_type = "type_unknown";
+
+       /* We always use a non-HT system because that goves us more events */
+       pmc_ops.cpu_type = "i386/p4";
+}
+
+
+static void __init ppro_init(void)
+{
+       __u8 cpu_model = current_cpu_data.x86_model;
+
+       if (cpu_model > 0xd)
+               pmc_ops.cpu_type = "type_unknown";
+
+       if (cpu_model == 9) {
+               pmc_ops.cpu_type = "i386/p6_mobile";
+       } else if (cpu_model > 5) {
+               pmc_ops.cpu_type = "i386/piii";
+       } else if (cpu_model > 2) {
+               pmc_ops.cpu_type = "i386/pii";
+       } else {
+               pmc_ops.cpu_type = "i386/ppro";
+       }
+}
+
+/* in order to get driverfs right */
+static int using_pmc;
+
+int __init oprofile_arch_init(struct oprofile_operations * ops)
+{
+       printk (KERN_INFO "oprofile_arch_init");
+       int ret = HYPERVISOR_pmc_op(PMC_INIT, (u64)&num_events, 
(u64)&is_primary);
+
+       if (!ret) {
+               __u8 vendor = current_cpu_data.x86_vendor;
+               __u8 family = current_cpu_data.x86;
+ 
+               if (vendor == X86_VENDOR_INTEL) {
+                       switch (family) {
+                               /* Pentium IV */
+                               case 0xf:
+                                       p4_init();
+                                       break;
+                               /* A P6-class processor */
+                               case 6:
+                                       ppro_init();
+                                       break;
+                               default:
+                                       pmc_ops.cpu_type = "type_unknown";
+                       }
+               } else pmc_ops.cpu_type = "type_unknown";
+
+               init_driverfs();
+               using_pmc = 1;
+               *ops = pmc_ops;
+       }
+       printk (KERN_INFO "oprofile_arch_init: ret %d, events %d, is_primary 
%d\n", ret, num_events, is_primary);
+       return ret;
+}
+
+
+void __exit oprofile_arch_exit(void)
+{
+       if (using_pmc)
+               exit_driverfs();
+
+       if (is_primary)
+               HYPERVISOR_pmc_op(PMC_SHUTDOWN, (u64)NULL, (u64)NULL);
+
+}
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/x86_64/Makefile 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/x86_64/Makefile
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/arch/xen/x86_64/Makefile 
2005-08-22 19:43:15 -05:00
+++ 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/arch/xen/x86_64/Makefile    
    2005-08-22 20:17:51 -05:00
@@ -69,7 +69,6 @@ libs-y                                        += 
arch/x86_64/lib/
 core-y                                 += arch/xen/x86_64/kernel/ 
arch/xen/x86_64/mm/
 core-$(CONFIG_IA32_EMULATION)          += arch/xen/x86_64/ia32/
 drivers-$(CONFIG_PCI)                  += arch/xen/x86_64/pci/
-drivers-$(CONFIG_OPROFILE)             += arch/x86_64/oprofile/
 
 # for clean
 obj-   += kernel/ mm/ pci/
diff -Naurp 
xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c   
2005-06-17 14:48:29 -05:00
+++ 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/buffer_sync.c
  2005-08-22 20:17:51 -05:00
@@ -6,6 +6,10 @@
  *
  * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
  *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
  * This is the core of the buffer management. Each
  * CPU buffer is processed and entered into the
  * global event buffer. Such processing is necessary
@@ -265,13 +269,30 @@ static void add_cpu_switch(int i)
        last_cookie = ~0UL;
 }
 
-static void add_kernel_ctx_switch(unsigned int in_kernel)
+static void add_cpu_mode_switch(unsigned int cpu_mode)
 {
        add_event_entry(ESCAPE_CODE);
-       if (in_kernel)
-               add_event_entry(KERNEL_ENTER_SWITCH_CODE); 
-       else
-               add_event_entry(KERNEL_EXIT_SWITCH_CODE); 
+       switch (cpu_mode)
+       {
+       case CPU_MODE_USER:
+               add_event_entry(USER_ENTER_SWITCH_CODE);
+               break;
+       case CPU_MODE_KERNEL:
+               add_event_entry(KERNEL_ENTER_SWITCH_CODE);
+               break;
+       case CPU_MODE_XEN:
+               add_event_entry(XEN_ENTER_SWITCH_CODE);
+               break;
+       default:
+               break;
+       }
+}
+
+static void add_dom_switch(int domain_id)
+{
+       add_event_entry(ESCAPE_CODE);
+       add_event_entry(DOMAIN_SWITCH_CODE);
+       add_event_entry(domain_id);
 }
  
 static void
@@ -337,10 +358,9 @@ static int add_us_sample(struct mm_struc
  * sample is converted into a persistent dentry/offset pair
  * for later lookup from userspace.
  */
-static int
-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
+static int add_sample(struct mm_struct * mm, struct op_sample * s, int 
cpu_mode)
 {
-       if (in_kernel) {
+       if (cpu_mode >= CPU_MODE_KERNEL) {
                add_sample_entry(s->eip, s->event);
                return 1;
        } else if (mm) {
@@ -374,6 +394,11 @@ static inline int is_code(unsigned long 
 {
        return val == ESCAPE_CODE;
 }
+
+static inline int is_dom_switch(unsigned long val)
+{
+       return val == DOMAIN_SWITCH_ESCAPE_CODE;
+}
  
 
 /* "acquire" as many cpu buffer slots as we can */
@@ -489,10 +514,11 @@ void sync_buffer(int cpu)
        struct mm_struct *mm = NULL;
        struct task_struct * new;
        unsigned long cookie = 0;
-       int in_kernel = 1;
+       int cpu_mode = 1;
        unsigned int i;
        sync_buffer_state state = sb_buffer_start;
        unsigned long available;
+       int domain_switch = 0;
 
        down(&buffer_sem);
  
@@ -506,12 +532,12 @@ void sync_buffer(int cpu)
                struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
  
                if (is_code(s->eip)) {
-                       if (s->event <= CPU_IS_KERNEL) {
+                       if (s->event <= CPU_MODE_MAX) {
                                /* kernel/userspace switch */
-                               in_kernel = s->event;
+                               cpu_mode = s->event;
                                if (state == sb_buffer_start)
                                        state = sb_sample_start;
-                               add_kernel_ctx_switch(s->event);
+                               add_cpu_mode_switch(s->event);
                        } else if (s->event == CPU_TRACE_BEGIN) {
                                state = sb_bt_start;
                                add_trace_begin();
@@ -528,11 +554,23 @@ void sync_buffer(int cpu)
                                add_user_ctx_switch(new, cookie);
                        }
                } else {
-                       if (state >= sb_bt_start &&
-                           !add_sample(mm, s, in_kernel)) {
-                               if (state == sb_bt_start) {
-                                       state = sb_bt_ignore;
-                                       
atomic_inc(&oprofile_stats.bt_lost_no_mapping);
+                       if (is_dom_switch(s->eip)) {
+                               add_dom_switch((int)(s->event));
+                               domain_switch = 1;
+                       }
+                       else {
+                               if (domain_switch) {
+                                       add_sample_entry (s->eip, s->event);
+                                       domain_switch = 0;
+                               }
+                               else {
+                                       if (state >= sb_bt_start &&
+                                           !add_sample(mm, s, cpu_mode)) {
+                                               if (state == sb_bt_start) {
+                                                       state = sb_bt_ignore;
+                                                       
atomic_inc(&oprofile_stats.bt_lost_no_mapping);
+                                               }
+                                       }
                                }
                        }
                }
diff -Naurp 
xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c    
2005-06-17 14:48:29 -05:00
+++ 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.c
   2005-08-22 20:17:51 -05:00
@@ -6,6 +6,10 @@
  *
  * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
  *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
  * Each CPU has a local buffer that stores PC value/event
  * pairs. We also log context switches when we notice them.
  * Eventually each CPU's buffer is processed into the global
@@ -58,7 +62,7 @@ int alloc_cpu_buffers(void)
                        goto fail;
  
                b->last_task = NULL;
-               b->last_is_kernel = -1;
+               b->last_cpu_mode = -1;
                b->tracing = 0;
                b->buffer_size = buffer_size;
                b->tail_pos = 0;
@@ -117,7 +121,7 @@ void cpu_buffer_reset(struct oprofile_cp
         * collected will populate the buffer with proper
         * values to initialize the buffer
         */
-       cpu_buf->last_is_kernel = -1;
+       cpu_buf->last_cpu_mode = -1;
        cpu_buf->last_task = NULL;
 }
 
@@ -180,7 +184,7 @@ add_code(struct oprofile_cpu_buffer * bu
  * events whenever is_kernel changes
  */
 static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
-                     int is_kernel, unsigned long event)
+                     int cpu_mode, unsigned long event)
 {
        struct task_struct * task;
 
@@ -191,24 +195,39 @@ static int log_sample(struct oprofile_cp
                return 0;
        }
 
-       is_kernel = !!is_kernel;
+       // Ensure a valid cpu mode
+       if (cpu_mode > CPU_MODE_XEN)
+               return 0;
 
        task = current;
 
-       /* notice a switch from user->kernel or vice versa */
-       if (cpu_buf->last_is_kernel != is_kernel) {
-               cpu_buf->last_is_kernel = is_kernel;
-               add_code(cpu_buf, is_kernel);
-       }
 
-       /* notice a task switch */
-       if (cpu_buf->last_task != task) {
-               cpu_buf->last_task = task;
-               add_code(cpu_buf, (unsigned long)task);
+       /* We treat samples from other domains in a special manner: 
+           each sample is preceded by a record with eip equal to ~1UL. 
+           This record is non-sticky i.e. it holds only for the following 
+           sample. The event field of this record stores the domain id.*/ 
+       if (pc == DOMAIN_SWITCH_ESCAPE_CODE) {
+               add_sample(cpu_buf, pc, event);
+               return 1;
+       } else {
+               /* notice a switch from user->kernel or vice versa */
+               if (cpu_buf->last_cpu_mode != cpu_mode) {
+                       cpu_buf->last_cpu_mode = cpu_mode;
+                       add_code(cpu_buf, cpu_mode);
+               }
+
+               /* notice a task switch */
+               if (cpu_buf->last_task != task) {
+                       cpu_buf->last_task = task;
+                       add_code(cpu_buf, (unsigned long)task);
+               }
+
+               /* Note: at this point, we lose the cpu_mode of a sample
+                  if it is from another domain */
+
+               add_sample(cpu_buf, pc, event);
+               return 1;
        }
- 
-       add_sample(cpu_buf, pc, event);
-       return 1;
 }
 
 static int oprofile_begin_trace(struct oprofile_cpu_buffer * cpu_buf)
@@ -229,6 +248,14 @@ static void oprofile_end_trace(struct op
        cpu_buf->tracing = 0;
 }
 
+void oprofile_add_sample_xen(unsigned long eip, unsigned int cpu_mode, 
+       unsigned long event)
+{
+       struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
+       log_sample(cpu_buf, eip, cpu_mode, event);
+
+  
+}
 
 void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
 {
diff -Naurp 
xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h    
2005-06-17 14:48:29 -05:00
+++ 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/cpu_buffer.h
   2005-08-22 20:17:51 -05:00
@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer {
        volatile unsigned long tail_pos;
        unsigned long buffer_size;
        struct task_struct * last_task;
-       int last_is_kernel;
+       int last_cpu_mode;
        int tracing;
        struct op_sample * buffer;
        unsigned long sample_received;
@@ -51,7 +51,14 @@ extern struct oprofile_cpu_buffer cpu_bu
 void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
 
 /* transient events for the CPU buffer -> event buffer */
-#define CPU_IS_KERNEL 1
-#define CPU_TRACE_BEGIN 2
+#define CPU_MODE_USER    0
+#define CPU_MODE_KERNEL  1
+#define CPU_MODE_XEN     2
+#define CPU_MODE_MAX     2
+#define CPU_TRACE_BEGIN  3
 
+/* special escape code for indicating next sample in the CPU */
+/* buffer is from another Xen domain */
+#define DOMAIN_SWITCH_ESCAPE_CODE ~1UL
+ 
 #endif /* OPROFILE_CPU_BUFFER_H */
diff -Naurp 
xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c  
2005-06-17 14:48:29 -05:00
+++ 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.c
 2005-08-22 20:17:51 -05:00
@@ -56,6 +56,7 @@ void add_event_entry(unsigned long value
 /* Wake up the waiting process if any. This happens
  * on "echo 0 >/dev/oprofile/enable" so the daemon
  * processes the data remaining in the event buffer.
+ * also called on echo 1 > /dev/oprofile/dump
  */
 void wake_up_buffer_waiter(void)
 {
diff -Naurp 
xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h  
2005-06-17 14:48:29 -05:00
+++ 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/event_buffer.h
 2005-08-22 20:17:51 -05:00
@@ -5,6 +5,10 @@
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  */
 
 #ifndef EVENT_BUFFER_H
@@ -29,11 +33,13 @@ void wake_up_buffer_waiter(void);
 #define CPU_SWITCH_CODE                2
 #define COOKIE_SWITCH_CODE             3
 #define KERNEL_ENTER_SWITCH_CODE       4
-#define KERNEL_EXIT_SWITCH_CODE                5
+#define USER_ENTER_SWITCH_CODE         5
 #define MODULE_LOADED_CODE             6
 #define CTX_TGID_CODE                  7
 #define TRACE_BEGIN_CODE               8
 #define TRACE_END_CODE                 9
+#define XEN_ENTER_SWITCH_CODE          10
+#define DOMAIN_SWITCH_CODE             11
  
 /* add data to the event buffer */
 void add_event_entry(unsigned long data);
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/oprof.c 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/oprof.c
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/oprof.c 
2005-06-17 14:48:29 -05:00
+++ 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/oprof.c    
    2005-08-22 20:17:51 -05:00
@@ -5,6 +5,10 @@
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  */
 
 #include <linux/kernel.h>
@@ -32,6 +36,25 @@ static DECLARE_MUTEX(start_sem);
    1 - use the timer int mechanism regardless
  */
 static int timer = 0;
+
+extern unsigned int adomains, pdomains;
+extern int active_domains[MAX_OPROF_DOMAINS], 
passive_domains[MAX_OPROF_DOMAINS];
+
+int oprofile_set_active(void)
+{
+       if (oprofile_ops.set_active)
+               return oprofile_ops.set_active(active_domains, adomains);
+
+       return -EINVAL;
+}
+
+int oprofile_set_passive(void)
+{
+       if (oprofile_ops.set_passive)
+               return oprofile_ops.set_passive(passive_domains, pdomains);
+
+       return -EINVAL;
+}
 
 int oprofile_setup(void)
 {
diff -Naurp 
xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c
--- 
xen-unstable.hg-20050822/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c    
    2005-06-17 14:48:29 -05:00
+++ 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/drivers/oprofile/oprofile_files.c
       2005-08-22 20:17:51 -05:00
@@ -5,10 +5,16 @@
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.      
  */
 
 #include <linux/fs.h>
 #include <linux/oprofile.h>
+#include <linux/pagemap.h>
+#include <linux/ctype.h>
 
 #include "event_buffer.h"
 #include "oprofile_stats.h"
@@ -117,11 +123,140 @@ static ssize_t dump_write(struct file * 
 static struct file_operations dump_fops = {
        .write          = dump_write,
 };
- 
+
+#define TMPBUFSIZE 50
+
+unsigned int adomains = 0;
+long active_domains[MAX_OPROF_DOMAINS];
+
+extern int oprofile_set_active(void);
+
+static ssize_t adomain_write(struct file *file, char const __user *buf, size_t 
count, loff_t * offset)
+{
+       char tmpbuf[TMPBUFSIZE];
+       char *startp = tmpbuf;
+       char *endp = tmpbuf;
+       int i;
+       unsigned long val;
+       
+       if (*offset)
+               return -EINVAL; 
+       if (!count)
+               return 0;
+       if (count > TMPBUFSIZE - 1)
+               return -EINVAL;
+
+       memset(tmpbuf, 0x0, TMPBUFSIZE);
+
+       if (copy_from_user(tmpbuf, buf, count))
+               return -EFAULT;
+       
+       for (i = 0; i < MAX_OPROF_DOMAINS; i++)
+               active_domains[i] = -1;
+       adomains = 0;
+
+       while (1) {
+               val = simple_strtol(startp, &endp, 0);
+               if (endp == startp)
+                       break;
+               while (ispunct(*endp))
+                       endp++;
+               active_domains[adomains++] = val;
+               if (adomains >= MAX_OPROF_DOMAINS)
+                       break;
+               startp = endp;
+       }
+       if (oprofile_set_active())
+               return -EINVAL; 
+       return count;
+}
+
+static ssize_t adomain_read(struct file *file, char __user * buf, size_t 
count, loff_t * offset)
+{
+       char tmpbuf[TMPBUFSIZE];
+       size_t len = 0;
+       int i;
+       /* This is all screwed up if we run out of space */
+       for (i = 0; i < adomains; i++) 
+               len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "%u ", 
(unsigned int)active_domains[i]);
+       len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "\n");
+       return simple_read_from_buffer((void __user *)buf, count, offset, 
tmpbuf, len);
+}
+
+
+static struct file_operations active_domain_ops = {
+       .read           = adomain_read,
+       .write          = adomain_write,
+};
+
+unsigned int pdomains = 0;
+long passive_domains[MAX_OPROF_DOMAINS];
+
+extern int oprofile_set_passive(void);
+
+static ssize_t pdomain_write(struct file *file, char const __user *buf, size_t 
count, loff_t * offset)
+{
+       char tmpbuf[TMPBUFSIZE];
+       char *startp = tmpbuf;
+       char *endp = tmpbuf;
+       int i;
+       unsigned long val;
+       
+       if (*offset)
+               return -EINVAL; 
+       if (!count)
+               return 0;
+       if (count > TMPBUFSIZE - 1)
+               return -EINVAL;
+
+       memset(tmpbuf, 0x0, TMPBUFSIZE);
+
+       if (copy_from_user(tmpbuf, buf, count))
+               return -EFAULT;
+       
+       for (i = 0; i < MAX_OPROF_DOMAINS; i++)
+               passive_domains[i] = -1;
+       pdomains = 0;
+
+       while (1) {
+               val = simple_strtol(startp, &endp, 0);
+               if (endp == startp)
+                       break;
+               while (ispunct(*endp))
+                       endp++;
+               passive_domains[pdomains++] = val;
+               if (pdomains >= MAX_OPROF_DOMAINS)
+                       break;
+               startp = endp;
+       }
+       if (oprofile_set_passive())
+               return -EINVAL; 
+       return count;
+}
+
+static ssize_t pdomain_read(struct file *file, char __user * buf, size_t 
count, loff_t * offset)
+{
+       char tmpbuf[TMPBUFSIZE];
+       size_t len = 0;
+       int i;
+       /* This is all screwed up if we run out of space */
+       for (i = 0; i < pdomains; i++) 
+               len += snprintf(tmpbuf + len, TMPBUFSIZE - len, "%u ", 
(unsigned int)passive_domains[i]);
+       len += snprintf (tmpbuf + len, TMPBUFSIZE - len, "\n");
+       return simple_read_from_buffer((void __user *)buf, count, offset, 
tmpbuf, len);
+}
+
+static struct file_operations passive_domain_ops = {
+       .read           = pdomain_read,
+       .write          = pdomain_write,
+};
+
 void oprofile_create_files(struct super_block * sb, struct dentry * root)
 {
        oprofilefs_create_file(sb, root, "enable", &enable_fops);
        oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
+       oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
+       oprofilefs_create_file(sb, root, "passive_domains", 
&passive_domain_ops);
        oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
        oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
        oprofilefs_create_ulong(sb, root, "buffer_watershed", 
&fs_buffer_watershed);
diff -Naurp 
xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h
--- 
xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h 
    2005-08-22 19:43:16 -05:00
+++ 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/asm-i386/hypercall.h
    2005-08-22 20:20:01 -05:00
@@ -576,4 +576,21 @@ HYPERVISOR_vcpu_pickle(
     return ret;
 }
 
+
+static inline int
+HYPERVISOR_pmc_op(
+       int op, unsigned int arg1, unsigned int arg2)
+{
+       int ret;
+       unsigned long ign1, ign2, ign3;
+
+       __asm__ __volatile__ (
+              TRAP_INSTR
+              : "=a"(ret), "=b"(ign1), "=c"(ign2), "=d"(ign3)
+              : "0"(__HYPERVISOR_pmc_op), "1"(op), "2"(arg1), "3"(arg2)
+              : "memory" );
+
+       return ret;
+}
+
 #endif /* __HYPERCALL_H__ */
diff -Naurp 
xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h
 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h
--- 
xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h
   2005-08-22 19:43:16 -05:00
+++ 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/asm-x86_64/hypercall.h
  2005-08-22 20:17:51 -05:00
@@ -519,4 +519,19 @@ HYPERVISOR_vcpu_pickle(
     return ret;
 }
 
+static inline int
+HYPERVISOR_pmc_op(
+       int op, u64 arg1, u64 arg2)
+{
+       int ret;
+
+       __asm__ __volatile__ (
+               TRAP_INSTR
+               : "=a"(ret)
+               : "0"(__HYPERVISOR_pmc_op), "D"(op), "S"(arg1), "d"(arg2)
+               : __syscall_clobber );
+
+       return ret;
+}
+
 #endif /* __HYPERCALL_H__ */
diff -Naurp 
xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/xen-public/xen.h 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/xen-public/xen.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/include/asm-xen/xen-public/xen.h 
2005-08-22 19:43:14 -05:00
+++ 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/asm-xen/xen-public/xen.h
        2005-08-22 20:17:51 -05:00
@@ -4,6 +4,10 @@
  * Guest OS interface to Xen.
  * 
  * Copyright (c) 2004, K A Fraser
+ *
+ * Modified by Aravind Menon for supporting oprofile
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  */
 
 #ifndef __XEN_PUBLIC_XEN_H__
@@ -59,6 +63,7 @@
 #define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
 #define __HYPERVISOR_mmuext_op            26
 #define __HYPERVISOR_acm_op               27
+#define __HYPERVISOR_pmc_op               28
 
 /* 
  * VIRTUAL INTERRUPTS
@@ -72,7 +77,8 @@
 #define VIRQ_PARITY_ERR 4  /* (DOM0) NMI parity error.                    */
 #define VIRQ_IO_ERR     5  /* (DOM0) NMI I/O error.                       */
 #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
-#define NR_VIRQS        7
+#define VIRQ_PMC_OVF   7  /* PMC Overflow */
+#define NR_VIRQS        8
 
 /*
  * MMU-UPDATE REQUESTS
@@ -240,6 +246,21 @@ struct mmuext_op {
 #define VMASST_TYPE_writable_pagetables  2
 #define MAX_VMASST_TYPE 2
 
+/*
+ * Commands to HYPERVISOR_pmc_op().
+ */
+#define PMC_INIT               0
+#define PMC_SET_ACTIVE         1
+#define PMC_SET_PASSIVE                2
+#define PMC_RESERVE_COUNTERS   3
+#define PMC_SETUP_EVENTS       4
+#define PMC_ENABLE_VIRQ                5
+#define PMC_START              6
+#define PMC_STOP               7
+#define PMC_DISABLE_VIRQ       8
+#define PMC_RELEASE_COUNTERS   9
+#define PMC_SHUTDOWN           10
+
 #ifndef __ASSEMBLY__
 
 typedef u16 domid_t;
@@ -292,6 +313,8 @@ typedef struct
 /* Event channel endpoints per domain. */
 #define NR_EVENT_CHANNELS 1024
 
+#define MAX_OPROF_EVENTS       32
+#define MAX_OPROF_DOMAINS      25      
 /*
  * Per-VCPU information goes here. This will be cleaned up more when Xen 
  * actually supports multi-VCPU guests.
@@ -407,6 +430,21 @@ typedef struct shared_info {
     u32 wc_nsec;         /* Nsecs 00:00:00 UTC, Jan 1, 1970.  */
 
     arch_shared_info_t arch;
+
+    /* Oprofile structures */
+    u8 event_head;
+    u8 event_tail;
+    struct {
+       u64 eip;
+       u8 mode;
+       u8 event;
+    } event_log[MAX_OPROF_EVENTS];
+    u8 losing_samples;
+    u64 samples_lost;
+    u32 nmi_restarts;
+    u64 active_samples;
+    u64 passive_samples;
+    u64 other_samples;
 
 } shared_info_t;
 
diff -Naurp xen-unstable.hg-20050822/linux-2.6.12-xen0/include/linux/oprofile.h 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/linux/oprofile.h
--- xen-unstable.hg-20050822/linux-2.6.12-xen0/include/linux/oprofile.h 
2005-06-17 14:48:29 -05:00
+++ 
xen-unstable.hg-20050822-oprofile/linux-2.6.12-xen0/include/linux/oprofile.h    
    2005-08-22 20:17:51 -05:00
@@ -8,6 +8,10 @@
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@xxxxxxxxxxxxxxxxx>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  */
 
 #ifndef OPROFILE_H
@@ -27,6 +31,10 @@ struct oprofile_operations {
        /* create any necessary configuration files in the oprofile fs.
         * Optional. */
        int (*create_files)(struct super_block * sb, struct dentry * root);
+       /* setup active domains with Xen */
+       int (*set_active)(int *active_domains, unsigned int adomains);
+       /* setup passive domains with Xen */
+       int (*set_passive)(int *passive_domains, unsigned int pdomains);
        /* Do any necessary interrupt setup. Optional. */
        int (*setup)(void);
        /* Do any necessary interrupt shutdown. Optional. */
@@ -60,6 +68,15 @@ void oprofile_arch_exit(void);
  * smp_processor_id() as cpu.
  */
 void oprofile_add_sample(struct pt_regs * const regs, unsigned long event);
+
+/**
+ * alternative function to Add a sample for Xen. 
+ * It would be better to combine both functions into only one but this would 
+ * require getting parameter cpu_mode(old is_kernel) back to 
+ * oprofile_add_sample() m(Xen is the best location to determine cpu_mode)
+ */
+extern void oprofile_add_sample_xen(unsigned long eip, unsigned int cpu_mode, 
+       unsigned long event);
 
 /* Use this instead when the PC value is not from the regs. Doesn't
  * backtrace. */

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>