WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] make all performance counter per-cpu

To: <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH] make all performance counter per-cpu
From: "Jan Beulich" <jbeulich@xxxxxxxxxx>
Date: Tue, 27 Mar 2007 16:12:04 +0100
Delivery-date: Tue, 27 Mar 2007 08:10:35 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
.. avoiding the need to update them with atomic (locked) ops.

Conversion here isn't complete in the sense that many places still use
the old per-CPU accessors (which are now redundant). Since the patch is
already rather big, I'd prefer replacing those in a subsequent patch.

While doing this, I also converted x86's multicall macros to no longer
require inclusion of asm-offsets.h in the respective C file (on IA64 the
use of asm-offsets.h in C sources seems more wide spread, hence there I
rather used IA64_ prefixes for the otherwise conflicting performance
counter indices).

On x86, a few counter increments get moved a little, to avoid duplicate
counting of preempted hypercalls.

Also, a few counters are being added.

IA64 changes only compile-tested, hence somebody doing active IA64 work
may want to have a close look at those changes.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

Index: 2007-03-19/xen/arch/ia64/asm-offsets.c
===================================================================
--- 2007-03-19.orig/xen/arch/ia64/asm-offsets.c 2007-02-12 14:00:54.000000000 
+0100
+++ 2007-03-19/xen/arch/ia64/asm-offsets.c      2007-03-27 16:35:08.000000000 
+0200
@@ -223,10 +223,11 @@ void foo(void)
 
 #ifdef PERF_COUNTERS
        BLANK();
-       DEFINE(RECOVER_TO_PAGE_FAULT_PERFC_OFS, offsetof (struct perfcounter, 
recover_to_page_fault));
-       DEFINE(RECOVER_TO_BREAK_FAULT_PERFC_OFS, offsetof (struct perfcounter, 
recover_to_break_fault));
-       DEFINE(FAST_HYPERPRIVOP_PERFC_OFS, offsetof (struct perfcounter, 
fast_hyperprivop));
-       DEFINE(FAST_REFLECT_PERFC_OFS, offsetof (struct perfcounter, 
fast_reflect));
+       DEFINE(IA64_PERFC_recover_to_page_fault, PERFC_recover_to_page_fault);
+       DEFINE(IA64_PERFC_recover_to_break_fault, PERFC_recover_to_break_fault);
+       DEFINE(IA64_PERFC_fast_vhpt_translate, PERFC_fast_vhpt_translate);
+       DEFINE(IA64_PERFC_fast_hyperprivop, PERFC_fast_hyperprivop);
+       DEFINE(IA64_PERFC_fast_reflect, PERFC_fast_reflect);
 #endif
 
        BLANK();
Index: 2007-03-19/xen/arch/ia64/xen/hyperprivop.S
===================================================================
--- 2007-03-19.orig/xen/arch/ia64/xen/hyperprivop.S     2007-02-12 
14:00:54.000000000 +0100
+++ 2007-03-19/xen/arch/ia64/xen/hyperprivop.S  2007-03-27 16:35:51.000000000 
+0200
@@ -26,8 +26,7 @@
 # define FAST_HYPERPRIVOPS
 # ifdef PERF_COUNTERS
 #  define FAST_HYPERPRIVOP_CNT
-#  define FAST_HYPERPRIVOP_PERFC(N) \
-       (perfcounters + FAST_HYPERPRIVOP_PERFC_OFS + (4 * N))
+#  define FAST_HYPERPRIVOP_PERFC(N) PERFC(fast_hyperprivop + N)
 #  define FAST_REFLECT_CNT
 # endif
        
@@ -364,7 +363,7 @@ GLOBAL_ENTRY(fast_tick_reflect)
        mov rp=r29;;
        mov cr.itm=r26;;        // ensure next tick
 #ifdef FAST_REFLECT_CNT
-       movl r20=perfcounters+FAST_REFLECT_PERFC_OFS+((0x3000>>8)*4);;
+       movl r20=PERFC(fast_reflect + (0x3000>>8));;
        ld4 r21=[r20];;
        adds r21=1,r21;;
        st4 [r20]=r21;;
@@ -597,7 +596,7 @@ END(fast_break_reflect)
 //     r31 == pr
 ENTRY(fast_reflect)
 #ifdef FAST_REFLECT_CNT
-       movl r22=perfcounters+FAST_REFLECT_PERFC_OFS;
+       movl r22=PERFC(fast_reflect);
        shr r23=r20,8-2;;
        add r22=r22,r23;;
        ld4 r21=[r22];;
@@ -938,7 +937,7 @@ fast_tlb_no_tr_match:
 (p7)   br.cond.spnt.few page_not_present;;
 
 #ifdef FAST_REFLECT_CNT
-       movl r21=perfcounter+FAST_VHPT_TRANSLATE_PERFC_OFS;;
+       movl r21=PERFC(fast_vhpt_translate);;
        ld4 r22=[r21];;
        adds r22=1,r22;;
        st4 [r21]=r22;;
@@ -968,7 +967,7 @@ END(fast_tlb_miss_reflect)
 // we get here if fast_insert fails (e.g. due to metaphysical lookup)
 ENTRY(recover_and_page_fault)
 #ifdef PERF_COUNTERS
-       movl r21=perfcounters + RECOVER_TO_PAGE_FAULT_PERFC_OFS;;
+       movl r21=PERFC(recover_to_page_fault);;
        ld4 r22=[r21];;
        adds r22=1,r22;;
        st4 [r21]=r22;;
@@ -1832,7 +1831,7 @@ END(hyper_ptc_ga)
 // recovery block for hyper_itc metaphysical memory lookup
 ENTRY(recover_and_dispatch_break_fault)
 #ifdef PERF_COUNTERS
-       movl r21=perfcounters + RECOVER_TO_BREAK_FAULT_PERFC_OFS;;
+       movl r21=PERFC(recover_to_break_fault);;
        ld4 r22=[r21];;
        adds r22=1,r22;;
        st4 [r21]=r22;;
Index: 2007-03-19/xen/arch/ia64/xen/privop_stat.c
===================================================================
--- 2007-03-19.orig/xen/arch/ia64/xen/privop_stat.c     2006-08-31 
15:26:11.000000000 +0200
+++ 2007-03-19/xen/arch/ia64/xen/privop_stat.c  2007-03-27 16:37:00.000000000 
+0200
@@ -10,48 +10,39 @@ struct privop_addr_count {
        unsigned long addr[PRIVOP_COUNT_NADDRS];
        unsigned int count[PRIVOP_COUNT_NADDRS];
        unsigned int overflow;
-       atomic_t *perfc_addr;
-       atomic_t *perfc_count;
-       atomic_t *perfc_overflow;
 };
 
-#undef  PERFCOUNTER
-#define PERFCOUNTER(var, name)
-
-#undef  PERFCOUNTER_CPU
-#define PERFCOUNTER_CPU(var, name)
+struct privop_addr_info {
+       enum perfcounter perfc_addr;
+       enum perfcounter perfc_count;
+       enum perfcounter perfc_overflow;
+};
 
-#undef  PERFCOUNTER_ARRAY
+#define PERFCOUNTER(var, name)
 #define PERFCOUNTER_ARRAY(var, name, size)
 
-#undef  PERFSTATUS
 #define PERFSTATUS(var, name)
-
-#undef  PERFSTATUS_CPU
-#define PERFSTATUS_CPU(var, name)
-
-#undef  PERFSTATUS_ARRAY
 #define PERFSTATUS_ARRAY(var, name, size)
 
-#undef PERFPRIVOPADDR
 #define PERFPRIVOPADDR(name)                        \
     {                                               \
-        { 0 }, { 0 }, 0,                            \
-        perfcounters.privop_addr_##name##_addr,     \
-        perfcounters.privop_addr_##name##_count,    \
-        perfcounters.privop_addr_##name##_overflow  \
+        PERFC_privop_addr_##name##_addr,            \
+        PERFC_privop_addr_##name##_count,           \
+        PERFC_privop_addr_##name##_overflow         \
     },
 
-static struct privop_addr_count privop_addr_counter[] = {
+static const struct privop_addr_info privop_addr_info[] = {
 #include <asm/perfc_defn.h>
 };
 
 #define PRIVOP_COUNT_NINSTS \
-        (sizeof(privop_addr_counter) / sizeof(privop_addr_counter[0]))
+        (sizeof(privop_addr_info) / sizeof(privop_addr_info[0]))
+
+static DEFINE_PER_CPU(struct privop_addr_count[PRIVOP_COUNT_NINSTS], 
privop_addr_counter);
 
 void privop_count_addr(unsigned long iip, enum privop_inst inst)
 {
-       struct privop_addr_count *v = &privop_addr_counter[inst];
+       struct privop_addr_count *v = this_cpu(privop_addr_counter) + inst;
        int i;
 
        if (inst >= PRIVOP_COUNT_NINSTS)
@@ -72,31 +63,44 @@ void privop_count_addr(unsigned long iip
 
 void gather_privop_addrs(void)
 {
-       int i, j;
-       atomic_t *v;
-       for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) {
-               /* Note: addresses are truncated!  */
-               v = privop_addr_counter[i].perfc_addr;
-               for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
-                       atomic_set(&v[j], privop_addr_counter[i].addr[j]);
-
-               v = privop_addr_counter[i].perfc_count;
-               for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
-                       atomic_set(&v[j], privop_addr_counter[i].count[j]);
+       unsigned int cpu;
+
+       for_each_cpu ( cpu ) {
+               perfc_t *perfcounters = per_cpu(perfcounters, cpu);
+               struct privop_addr_count *s = per_cpu(privop_addr_counter, cpu);
+               int i, j;
+
+               for (i = 0; i < PRIVOP_COUNT_NINSTS; i++, s++) {
+                       perfc_t *d;
+
+                       /* Note: addresses are truncated!  */
+                       d = perfcounters + privop_addr_info[i].perfc_addr;
+                       for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
+                               d[j] = s->addr[j];
+
+                       d = perfcounters + privop_addr_info[i].perfc_count;
+                       for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
+                               d[j] = s->count[j];
                
-               atomic_set(privop_addr_counter[i].perfc_overflow,
-                          privop_addr_counter[i].overflow);
+                       perfcounters[privop_addr_info[i].perfc_overflow] =
+                               s->overflow;
+               }
        }
 }
 
 void reset_privop_addrs(void)
 {
-       int i, j;
-       for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) {
-               struct privop_addr_count *v = &privop_addr_counter[i];
-               for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
-                       v->addr[j] = v->count[j] = 0;
-               v->overflow = 0;
+       unsigned int cpu;
+
+       for_each_cpu ( cpu ) {
+               struct privop_addr_count *v = per_cpu(privop_addr_counter, cpu);
+               int i, j;
+
+               for (i = 0; i < PRIVOP_COUNT_NINSTS; i++, v++) {
+                       for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
+                               v->addr[j] = v->count[j] = 0;
+                       v->overflow = 0;
+               }
        }
 }
 #endif
Index: 2007-03-19/xen/arch/ia64/xen/vhpt.c
===================================================================
--- 2007-03-19.orig/xen/arch/ia64/xen/vhpt.c    2007-02-12 14:00:54.000000000 
+0100
+++ 2007-03-19/xen/arch/ia64/xen/vhpt.c 2007-03-27 15:13:06.000000000 +0200
@@ -512,7 +512,7 @@ void gather_vhpt_stats(void)
                for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++)
                        if (!(v->ti_tag & INVALID_TI_TAG))
                                vhpt_valid++;
-               perfc_seta(vhpt_valid_entries, cpu, vhpt_valid);
+               per_cpu(perfcounters, cpu)[PERFC_vhpt_valid_entries] = 
vhpt_valid;
        }
 }
 #endif
Index: 2007-03-19/xen/arch/x86/mm.c
===================================================================
--- 2007-03-19.orig/xen/arch/x86/mm.c   2007-03-27 10:31:15.000000000 +0200
+++ 2007-03-19/xen/arch/x86/mm.c        2007-03-27 10:32:56.000000000 +0200
@@ -1969,6 +1969,8 @@ int do_mmuext_op(
         if ( unlikely(!guest_handle_is_null(pdone)) )
             (void)copy_from_guest(&done, pdone, 1);
     }
+    else
+        perfc_incr(calls_to_mmuext_op);
 
     if ( unlikely(!guest_handle_okay(uops, count)) )
     {
@@ -2223,6 +2225,8 @@ int do_mmuext_op(
 
     UNLOCK_BIGLOCK(d);
 
+    perfc_add(num_mmuext_ops, i);
+
  out:
     /* Add incremental work we have done to the @done output parameter. */
     if ( unlikely(!guest_handle_is_null(pdone)) )
@@ -2257,6 +2261,8 @@ int do_mmu_update(
         if ( unlikely(!guest_handle_is_null(pdone)) )
             (void)copy_from_guest(&done, pdone, 1);
     }
+    else
+        perfc_incr(calls_to_mmu_update);
 
     if ( unlikely(!guest_handle_okay(ureqs, count)) )
     {
@@ -2273,9 +2279,6 @@ int do_mmu_update(
     domain_mmap_cache_init(&mapcache);
     domain_mmap_cache_init(&sh_mapcache);
 
-    perfc_incrc(calls_to_mmu_update);
-    perfc_addc(num_page_updates, count);
-
     LOCK_BIGLOCK(d);
 
     for ( i = 0; i < count; i++ )
@@ -2438,6 +2441,8 @@ int do_mmu_update(
     domain_mmap_cache_destroy(&mapcache);
     domain_mmap_cache_destroy(&sh_mapcache);
 
+    perfc_add(num_page_updates, i);
+
  out:
     /* Add incremental work we have done to the @done output parameter. */
     if ( unlikely(!guest_handle_is_null(pdone)) )
Index: 2007-03-19/xen/arch/x86/x86_32/asm-offsets.c
===================================================================
--- 2007-03-19.orig/xen/arch/x86/x86_32/asm-offsets.c   2007-03-19 
13:23:52.000000000 +0100
+++ 2007-03-19/xen/arch/x86/x86_32/asm-offsets.c        2007-03-27 
11:16:21.000000000 +0200
@@ -107,21 +107,11 @@ void __dummy__(void)
     BLANK();
 
 #if PERF_COUNTERS
-    OFFSET(PERFC_hypercalls, struct perfcounter, hypercalls);
-    OFFSET(PERFC_exceptions, struct perfcounter, exceptions);
+    DEFINE(PERFC_hypercalls, PERFC_hypercalls);
+    DEFINE(PERFC_exceptions, PERFC_exceptions);
     BLANK();
 #endif
 
-    OFFSET(MULTICALL_op, struct multicall_entry, op);
-    OFFSET(MULTICALL_arg0, struct multicall_entry, args[0]);
-    OFFSET(MULTICALL_arg1, struct multicall_entry, args[1]);
-    OFFSET(MULTICALL_arg2, struct multicall_entry, args[2]);
-    OFFSET(MULTICALL_arg3, struct multicall_entry, args[3]);
-    OFFSET(MULTICALL_arg4, struct multicall_entry, args[4]);
-    OFFSET(MULTICALL_arg5, struct multicall_entry, args[5]);
-    OFFSET(MULTICALL_result, struct multicall_entry, result);
-    BLANK();
-
     DEFINE(FIXMAP_apic_base, fix_to_virt(FIX_APIC_BASE));
     BLANK();
 
Index: 2007-03-19/xen/arch/x86/x86_32/entry.S
===================================================================
--- 2007-03-19.orig/xen/arch/x86/x86_32/entry.S 2007-02-28 12:10:37.000000000 
+0100
+++ 2007-03-19/xen/arch/x86/x86_32/entry.S      2007-03-27 12:12:51.000000000 
+0200
@@ -173,7 +173,7 @@ ENTRY(hypercall)
         GET_CURRENT(%ebx)
         cmpl  $NR_hypercalls,%eax
         jae   bad_hypercall
-        PERFC_INCR(PERFC_hypercalls, %eax)
+        PERFC_INCR(PERFC_hypercalls, %eax, %ebx)
 #ifndef NDEBUG
         /* Create shadow parameters and corrupt those not used by this call. */
         pushl %eax
@@ -429,7 +429,7 @@ handle_exception:
         movl  %esp,%edx
         pushl %edx                      # push the cpu_user_regs pointer
         GET_CURRENT(%ebx)
-        PERFC_INCR(PERFC_exceptions, %eax)
+        PERFC_INCR(PERFC_exceptions, %eax, %ebx)
         call  *exception_table(,%eax,4)
         addl  $4,%esp
         movl  UREGS_eflags(%esp),%eax
Index: 2007-03-19/xen/arch/x86/x86_64/asm-offsets.c
===================================================================
--- 2007-03-19.orig/xen/arch/x86/x86_64/asm-offsets.c   2007-03-19 
13:23:52.000000000 +0100
+++ 2007-03-19/xen/arch/x86/x86_64/asm-offsets.c        2007-03-27 
11:38:22.000000000 +0200
@@ -121,30 +121,8 @@ void __dummy__(void)
     BLANK();
 
 #if PERF_COUNTERS
-    OFFSET(PERFC_hypercalls, struct perfcounter, hypercalls);
-    OFFSET(PERFC_exceptions, struct perfcounter, exceptions);
-    BLANK();
-#endif
-
-    OFFSET(MULTICALL_op, struct multicall_entry, op);
-    OFFSET(MULTICALL_arg0, struct multicall_entry, args[0]);
-    OFFSET(MULTICALL_arg1, struct multicall_entry, args[1]);
-    OFFSET(MULTICALL_arg2, struct multicall_entry, args[2]);
-    OFFSET(MULTICALL_arg3, struct multicall_entry, args[3]);
-    OFFSET(MULTICALL_arg4, struct multicall_entry, args[4]);
-    OFFSET(MULTICALL_arg5, struct multicall_entry, args[5]);
-    OFFSET(MULTICALL_result, struct multicall_entry, result);
-    BLANK();
-
-#ifdef CONFIG_COMPAT
-    OFFSET(COMPAT_MULTICALL_op, struct compat_multicall_entry, op);
-    OFFSET(COMPAT_MULTICALL_arg0, struct compat_multicall_entry, args[0]);
-    OFFSET(COMPAT_MULTICALL_arg1, struct compat_multicall_entry, args[1]);
-    OFFSET(COMPAT_MULTICALL_arg2, struct compat_multicall_entry, args[2]);
-    OFFSET(COMPAT_MULTICALL_arg3, struct compat_multicall_entry, args[3]);
-    OFFSET(COMPAT_MULTICALL_arg4, struct compat_multicall_entry, args[4]);
-    OFFSET(COMPAT_MULTICALL_arg5, struct compat_multicall_entry, args[5]);
-    OFFSET(COMPAT_MULTICALL_result, struct compat_multicall_entry, result);
+    DEFINE(PERFC_hypercalls, PERFC_hypercalls);
+    DEFINE(PERFC_exceptions, PERFC_exceptions);
     BLANK();
 #endif
 
Index: 2007-03-19/xen/arch/x86/x86_64/compat/entry.S
===================================================================
--- 2007-03-19.orig/xen/arch/x86/x86_64/compat/entry.S  2007-03-19 
13:23:52.000000000 +0100
+++ 2007-03-19/xen/arch/x86/x86_64/compat/entry.S       2007-03-27 
12:12:29.000000000 +0200
@@ -57,7 +57,7 @@ ENTRY(compat_hypercall)
         movl  UREGS_rbx(%rsp),%edi   /* Arg 1        */
 #endif
         leaq  compat_hypercall_table(%rip),%r10
-        PERFC_INCR(PERFC_hypercalls, %rax)
+        PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
         callq *(%r10,%rax,8)
 #ifndef NDEBUG
         /* Deliberately corrupt parameter regs used by this hypercall. */
Index: 2007-03-19/xen/arch/x86/x86_64/entry.S
===================================================================
--- 2007-03-19.orig/xen/arch/x86/x86_64/entry.S 2007-02-28 12:10:32.000000000 
+0100
+++ 2007-03-19/xen/arch/x86/x86_64/entry.S      2007-03-27 12:11:33.000000000 
+0200
@@ -147,7 +147,7 @@ ENTRY(syscall_enter)
         pushq UREGS_rip+8(%rsp)
 #endif
         leaq  hypercall_table(%rip),%r10
-        PERFC_INCR(PERFC_hypercalls, %rax)
+        PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
         callq *(%r10,%rax,8)
 #ifndef NDEBUG
         /* Deliberately corrupt parameter regs used by this hypercall. */
@@ -396,7 +396,7 @@ ENTRY(handle_exception)
         movl  UREGS_entry_vector(%rsp),%eax
         leaq  exception_table(%rip),%rdx
         GET_CURRENT(%rbx)
-        PERFC_INCR(PERFC_exceptions, %rax)
+        PERFC_INCR(PERFC_exceptions, %rax, %rbx)
         callq *(%rdx,%rax,8)
         testb $3,UREGS_cs(%rsp)
         jz    restore_all_xen
Index: 2007-03-19/xen/common/multicall.c
===================================================================
--- 2007-03-19.orig/xen/common/multicall.c      2007-03-27 10:31:15.000000000 
+0200
+++ 2007-03-19/xen/common/multicall.c   2007-03-27 10:32:56.000000000 +0200
@@ -10,6 +10,7 @@
 #include <xen/event.h>
 #include <xen/multicall.h>
 #include <xen/guest_access.h>
+#include <xen/perfc.h>
 #include <asm/current.h>
 #include <asm/hardirq.h>
 
@@ -69,14 +70,18 @@ do_multicall(
         guest_handle_add_offset(call_list, 1);
     }
 
+    perfc_incr(calls_to_multicall);
+    perfc_add(calls_from_multicall, nr_calls);
     mcs->flags = 0;
     return 0;
 
  fault:
+    perfc_incr(calls_to_multicall);
     mcs->flags = 0;
     return -EFAULT;
 
  preempted:
+    perfc_add(calls_from_multicall, i);
     mcs->flags = 0;
     return hypercall_create_continuation(
         __HYPERVISOR_multicall, "hi", call_list, nr_calls-i);
Index: 2007-03-19/xen/common/perfc.c
===================================================================
--- 2007-03-19.orig/xen/common/perfc.c  2007-03-27 10:31:15.000000000 +0200
+++ 2007-03-19/xen/common/perfc.c       2007-03-27 13:33:55.000000000 +0200
@@ -10,81 +10,98 @@
 #include <public/sysctl.h>
 #include <asm/perfc.h>
 
-#undef  PERFCOUNTER
-#undef  PERFCOUNTER_CPU
-#undef  PERFCOUNTER_ARRAY
-#undef  PERFSTATUS
-#undef  PERFSTATUS_CPU
-#undef  PERFSTATUS_ARRAY
 #define PERFCOUNTER( var, name )              { name, TYPE_SINGLE, 0 },
-#define PERFCOUNTER_CPU( var, name )          { name, TYPE_CPU,    0 },
 #define PERFCOUNTER_ARRAY( var, name, size )  { name, TYPE_ARRAY,  size },
 #define PERFSTATUS( var, name )               { name, TYPE_S_SINGLE, 0 },
-#define PERFSTATUS_CPU( var, name )           { name, TYPE_S_CPU,    0 },
 #define PERFSTATUS_ARRAY( var, name, size )   { name, TYPE_S_ARRAY,  size },
-static struct {
-    char *name;
-    enum { TYPE_SINGLE, TYPE_CPU, TYPE_ARRAY,
-           TYPE_S_SINGLE, TYPE_S_CPU, TYPE_S_ARRAY
+static const struct {
+    const char *name;
+    enum { TYPE_SINGLE, TYPE_ARRAY,
+           TYPE_S_SINGLE, TYPE_S_ARRAY
     } type;
-    int nr_elements;
+    unsigned int nr_elements;
 } perfc_info[] = {
 #include <xen/perfc_defn.h>
 };
 
 #define NR_PERFCTRS (sizeof(perfc_info) / sizeof(perfc_info[0]))
 
-struct perfcounter perfcounters;
+DEFINE_PER_CPU(perfc_t[NUM_PERFCOUNTERS], perfcounters);
 
 void perfc_printall(unsigned char key)
 {
-    unsigned int i, j, sum;
+    unsigned int i, j;
     s_time_t now = NOW();
-    atomic_t *counters = (atomic_t *)&perfcounters;
 
     printk("Xen performance counters SHOW  (now = 0x%08X:%08X)\n",
            (u32)(now>>32), (u32)now);
 
-    for ( i = 0; i < NR_PERFCTRS; i++ ) 
+    for ( i = j = 0; i < NR_PERFCTRS; i++ )
     {
+        unsigned int k, cpu;
+        unsigned long long sum = 0;
+
         printk("%-32s  ",  perfc_info[i].name);
         switch ( perfc_info[i].type )
         {
         case TYPE_SINGLE:
         case TYPE_S_SINGLE:
-            printk("TOTAL[%10d]", atomic_read(&counters[0]));
-            counters += 1;
-            break;
-        case TYPE_CPU:
-        case TYPE_S_CPU:
-            sum = 0;
-            for_each_online_cpu ( j )
-                sum += atomic_read(&counters[j]);
-            printk("TOTAL[%10u]", sum);
-            if (sum)
+            for_each_online_cpu ( cpu )
+                sum += per_cpu(perfcounters, cpu)[j];
+            printk("TOTAL[%12Lu]", sum);
+            if ( sum )
             {
-                for_each_online_cpu ( j )
-                    printk("  CPU%02d[%10d]", j, atomic_read(&counters[j]));
+                k = 0;
+                for_each_online_cpu ( cpu )
+                {
+                    if ( k > 0 && (k % 4) == 0 )
+                        printk("\n%46s", "");
+                    printk("  CPU%02u[%10"PRIperfc"u]", cpu, 
per_cpu(perfcounters, cpu)[j]);
+                    ++k;
+                }
             }
-            counters += NR_CPUS;
+            ++j;
             break;
         case TYPE_ARRAY:
         case TYPE_S_ARRAY:
-            for ( j = sum = 0; j < perfc_info[i].nr_elements; j++ )
-                sum += atomic_read(&counters[j]);
-            printk("TOTAL[%10u]", sum);
-#ifdef PERF_ARRAYS
+            for_each_online_cpu ( cpu )
+            {
+                perfc_t *counters = per_cpu(perfcounters, cpu) + j;
+
+                for ( k = 0; k < perfc_info[i].nr_elements; k++ )
+                    sum += counters[k];
+            }
+            printk("TOTAL[%12Lu]", sum);
             if (sum)
             {
-                for ( j = 0; j < perfc_info[i].nr_elements; j++ )
+#ifdef PERF_ARRAYS
+                for ( k = 0; k < perfc_info[i].nr_elements; k++ )
                 {
-                    if ( (j % 4) == 0 )
-                        printk("\n                 ");
-                    printk("  ARR%02d[%10d]", j, atomic_read(&counters[j]));
+                    sum = 0;
+                    for_each_online_cpu ( cpu )
+                        sum += per_cpu(perfcounters, cpu)[j + k];
+                    if ( (k % 4) == 0 )
+                        printk("\n%16s", "");
+                    printk("  ARR%02u[%10Lu]", k, sum);
+                }
+#else
+                k = 0;
+                for_each_online_cpu ( cpu )
+                {
+                    perfc_t *counters = per_cpu(perfcounters, cpu) + j;
+                    unsigned int n;
+
+                    sum = 0;
+                    for ( n = 0; n < perfc_info[i].nr_elements; n++ )
+                        sum += counters[n];
+                    if ( k > 0 && (k % 4) == 0 )
+                        printk("\n%46s", "");
+                    printk("  CPU%02u[%10Lu]", cpu, sum);
+                    ++k;
                 }
-            }
 #endif
-            counters += j;
+            }
+            j += perfc_info[i].nr_elements;
             break;
         }
         printk("\n");
@@ -97,7 +114,6 @@ void perfc_reset(unsigned char key)
 {
     unsigned int i, j;
     s_time_t now = NOW();
-    atomic_t *counters = (atomic_t *)&perfcounters;
 
     if ( key != '\0' )
         printk("Xen performance counters RESET (now = 0x%08X:%08X)\n",
@@ -105,43 +121,39 @@ void perfc_reset(unsigned char key)
 
     /* leave STATUS counters alone -- don't reset */
 
-    for ( i = 0; i < NR_PERFCTRS; i++ ) 
+    for ( i = j = 0; i < NR_PERFCTRS; i++ )
     {
+        unsigned int cpu;
+
         switch ( perfc_info[i].type )
         {
         case TYPE_SINGLE:
-            atomic_set(&counters[0],0);
+            for_each_cpu ( cpu )
+                per_cpu(perfcounters, cpu)[j] = 0;
         case TYPE_S_SINGLE:
-            counters += 1;
-            break;
-        case TYPE_CPU:
-            for ( j = 0; j < NR_CPUS; j++ )
-                atomic_set(&counters[j],0);
-        case TYPE_S_CPU:
-            counters += NR_CPUS;
+            ++j;
             break;
         case TYPE_ARRAY:
-            for ( j = 0; j < perfc_info[i].nr_elements; j++ )
-                atomic_set(&counters[j],0);
+            for_each_cpu ( cpu )
+                memset(per_cpu(perfcounters, cpu) + j, 0,
+                       perfc_info[i].nr_elements * sizeof(perfc_t));
         case TYPE_S_ARRAY:
-            counters += perfc_info[i].nr_elements;
+            j += perfc_info[i].nr_elements;
             break;
         }
     }
 
-    arch_perfc_reset ();
+    arch_perfc_reset();
 }
 
 static xen_sysctl_perfc_desc_t perfc_d[NR_PERFCTRS];
 static xen_sysctl_perfc_val_t *perfc_vals;
-static int               perfc_nbr_vals;
+static unsigned int      perfc_nbr_vals;
 static int               perfc_init = 0;
 static int perfc_copy_info(XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc,
                            XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val)
 {
-    unsigned int i, j;
-    unsigned int v = 0;
-    atomic_t *counters = (atomic_t *)&perfcounters;
+    unsigned int i, j, v;
 
     /* We only copy the name and array-size information once. */
     if ( !perfc_init ) 
@@ -154,11 +166,7 @@ static int perfc_copy_info(XEN_GUEST_HAN
             {
             case TYPE_SINGLE:
             case TYPE_S_SINGLE:
-                perfc_d[i].nr_vals = 1;
-                break;
-            case TYPE_CPU:
-            case TYPE_S_CPU:
-                perfc_d[i].nr_vals = num_online_cpus();
+                perfc_d[i].nr_vals = num_possible_cpus();
                 break;
             case TYPE_ARRAY:
             case TYPE_S_ARRAY:
@@ -181,26 +189,31 @@ static int perfc_copy_info(XEN_GUEST_HAN
     arch_perfc_gather();
 
     /* We gather the counts together every time. */
-    for ( i = 0; i < NR_PERFCTRS; i++ )
+    for ( i = j = v = 0; i < NR_PERFCTRS; i++ )
     {
+        unsigned int cpu;
+
         switch ( perfc_info[i].type )
         {
         case TYPE_SINGLE:
         case TYPE_S_SINGLE:
-            perfc_vals[v++] = atomic_read(&counters[0]);
-            counters += 1;
-            break;
-        case TYPE_CPU:
-        case TYPE_S_CPU:
-            for ( j = 0; j < perfc_d[i].nr_vals; j++ )
-                perfc_vals[v++] = atomic_read(&counters[j]);
-            counters += NR_CPUS;
+            for_each_cpu ( cpu )
+                perfc_vals[v++] = per_cpu(perfcounters, cpu)[j];
+            ++j;
             break;
         case TYPE_ARRAY:
         case TYPE_S_ARRAY:
-            for ( j = 0; j < perfc_d[i].nr_vals; j++ )
-                perfc_vals[v++] = atomic_read(&counters[j]);
-            counters += perfc_info[i].nr_elements;
+            memset(perfc_vals + v, 0, perfc_d[i].nr_vals * 
sizeof(*perfc_vals));
+            for_each_cpu ( cpu )
+            {
+                perfc_t *counters = per_cpu(perfcounters, cpu) + j;
+                unsigned int k;
+
+                for ( k = 0; k < perfc_d[i].nr_vals; k++ )
+                    perfc_vals[v + k] += counters[k];
+            }
+            v += perfc_d[i].nr_vals;
+            j += perfc_info[i].nr_elements;
             break;
         }
     }
@@ -224,14 +237,12 @@ int perfc_control(xen_sysctl_perfc_op_t 
     switch ( pc->cmd )
     {
     case XEN_SYSCTL_PERFCOP_reset:
-        perfc_copy_info(pc->desc, pc->val);
+        rc = perfc_copy_info(pc->desc, pc->val);
         perfc_reset(0);
-        rc = 0;
         break;
 
     case XEN_SYSCTL_PERFCOP_query:
-        perfc_copy_info(pc->desc, pc->val);
-        rc = 0;
+        rc = perfc_copy_info(pc->desc, pc->val);
         break;
 
     default:
Index: 2007-03-19/xen/include/asm-ia64/linux-xen/asm/asmmacro.h
===================================================================
--- 2007-03-19.orig/xen/include/asm-ia64/linux-xen/asm/asmmacro.h       
2006-07-31 13:57:56.000000000 +0200
+++ 2007-03-19/xen/include/asm-ia64/linux-xen/asm/asmmacro.h    2007-03-27 
16:32:49.000000000 +0200
@@ -116,4 +116,8 @@ name:
 # define dv_serialize_instruction
 #endif
 
+#ifdef PERF_COUNTERS
+#define PERFC(n) (THIS_CPU(perfcounters) + (IA64_PERFC_ ## n) * 4)
+#endif
+
 #endif /* _ASM_IA64_ASMMACRO_H */
Index: 2007-03-19/xen/include/asm-ia64/perfc_defn.h
===================================================================
--- 2007-03-19.orig/xen/include/asm-ia64/perfc_defn.h   2006-11-15 
11:50:51.000000000 +0100
+++ 2007-03-19/xen/include/asm-ia64/perfc_defn.h        2007-03-27 
16:56:53.000000000 +0200
@@ -84,7 +84,7 @@ PERFCOUNTER_ARRAY(slow_reflect,       "s
 PERFCOUNTER_ARRAY(fast_reflect,       "fast reflection", 0x80)
 
 PERFSTATUS(vhpt_nbr_entries,          "nbr of entries per VHPT")
-PERFSTATUS_CPU(vhpt_valid_entries,    "nbr of valid entries in VHPT")
+PERFSTATUS(vhpt_valid_entries,        "nbr of valid entries in VHPT")
 
 PERFCOUNTER_ARRAY(vmx_mmio_access,    "vmx_mmio_access", 8)
 PERFCOUNTER_CPU(vmx_pal_emul,         "vmx_pal_emul")
@@ -106,6 +106,8 @@ PERFSTATUS(privop_addr_##name##_overflow
 
 PERFPRIVOPADDR(get_ifa)
 PERFPRIVOPADDR(thash)
+
+#undef PERFPRIVOPADDR
 #endif
 
 // vhpt.c
Index: 2007-03-19/xen/include/asm-ia64/privop_stat.h
===================================================================
--- 2007-03-19.orig/xen/include/asm-ia64/privop_stat.h  2006-08-31 
15:26:11.000000000 +0200
+++ 2007-03-19/xen/include/asm-ia64/privop_stat.h       2007-03-27 
14:34:04.000000000 +0200
@@ -1,5 +1,5 @@
-#ifndef _XEN_UA64_PRIVOP_STAT_H
-#define _XEN_UA64_PRIVOP_STAT_H
+#ifndef _XEN_IA64_PRIVOP_STAT_H
+#define _XEN_IA64_PRIVOP_STAT_H
 #include <asm/config.h>
 #include <xen/types.h>
 #include <public/xen.h>
@@ -9,31 +9,24 @@
 extern void gather_privop_addrs(void);
 extern void reset_privop_addrs(void);
 
-#undef  PERFCOUNTER
 #define PERFCOUNTER(var, name)
-
-#undef  PERFCOUNTER_CPU
-#define PERFCOUNTER_CPU(var, name)
-
-#undef  PERFCOUNTER_ARRAY
 #define PERFCOUNTER_ARRAY(var, name, size)
 
-#undef  PERFSTATUS
 #define PERFSTATUS(var, name)
-
-#undef  PERFSTATUS_CPU
-#define PERFSTATUS_CPU(var, name)
-
-#undef  PERFSTATUS_ARRAY
 #define PERFSTATUS_ARRAY(var, name, size)
 
-#undef  PERFPRIVOPADDR
 #define PERFPRIVOPADDR(name) privop_inst_##name,
 
 enum privop_inst {
 #include <asm/perfc_defn.h>
 };
 
+#undef PERFCOUNTER
+#undef PERFCOUNTER_ARRAY
+
+#undef PERFSTATUS
+#undef PERFSTATUS_ARRAY
+
 #undef PERFPRIVOPADDR
 
 #define        PRIVOP_COUNT_ADDR(regs,inst) 
privop_count_addr(regs->cr_iip,inst)
@@ -45,4 +38,4 @@ extern void privop_count_addr(unsigned l
 #define reset_privop_addrs() do {} while (0)
 #endif
 
-#endif /* _XEN_UA64_PRIVOP_STAT_H */
+#endif /* _XEN_IA64_PRIVOP_STAT_H */
Index: 2007-03-19/xen/include/asm-x86/multicall.h
===================================================================
--- 2007-03-19.orig/xen/include/asm-x86/multicall.h     2007-01-08 
14:15:32.000000000 +0100
+++ 2007-03-19/xen/include/asm-x86/multicall.h  2007-03-27 11:48:14.000000000 
+0200
@@ -6,84 +6,94 @@
 #define __ASM_X86_MULTICALL_H__
 
 #include <xen/errno.h>
-#include <asm/asm_defns.h>
 
 #ifdef __x86_64__
 
 #define do_multicall_call(_call)                             \
     do {                                                     \
         __asm__ __volatile__ (                               \
-            "    movq  "STR(MULTICALL_op)"(%0),%%rax; "      \
+            "    movq  %c1(%0),%%rax; "                      \
+            "    leaq  hypercall_table(%%rip),%%rdi; "       \
             "    cmpq  $("STR(NR_hypercalls)"),%%rax; "      \
             "    jae   2f; "                                 \
-            "    leaq  hypercall_table(%%rip),%%rdi; "       \
-            "    leaq  (%%rdi,%%rax,8),%%rax; "              \
-            "    movq  "STR(MULTICALL_arg0)"(%0),%%rdi; "    \
-            "    movq  "STR(MULTICALL_arg1)"(%0),%%rsi; "    \
-            "    movq  "STR(MULTICALL_arg2)"(%0),%%rdx; "    \
-            "    movq  "STR(MULTICALL_arg3)"(%0),%%rcx; "    \
-            "    movq  "STR(MULTICALL_arg4)"(%0),%%r8; "     \
-            "    callq *(%%rax); "                           \
-            "1:  movq  %%rax,"STR(MULTICALL_result)"(%0)\n"  \
+            "    movq  (%%rdi,%%rax,8),%%rax; "              \
+            "    movq  %c2+0*%c3(%0),%%rdi; "                \
+            "    movq  %c2+1*%c3(%0),%%rsi; "                \
+            "    movq  %c2+2*%c3(%0),%%rdx; "                \
+            "    movq  %c2+3*%c3(%0),%%rcx; "                \
+            "    movq  %c2+4*%c3(%0),%%r8; "                 \
+            "    callq *%%rax; "                             \
+            "1:  movq  %%rax,%c4(%0)\n"                      \
             ".section .fixup,\"ax\"\n"                       \
             "2:  movq  $-"STR(ENOSYS)",%%rax\n"              \
             "    jmp   1b\n"                                 \
             ".previous\n"                                    \
-            : : "b" (_call)                                  \
+            :                                                \
+            : "b" (_call),                                   \
+              "i" (offsetof(__typeof__(*_call), op)),        \
+              "i" (offsetof(__typeof__(*_call), args)),      \
+              "i" (sizeof(*(_call)->args)),                  \
+              "i" (offsetof(__typeof__(*_call), result))     \
               /* all the caller-saves registers */           \
             : "rax", "rcx", "rdx", "rsi", "rdi",             \
               "r8",  "r9",  "r10", "r11" );                  \
     } while ( 0 )
 
-#define compat_multicall_call(_call)                              \
-    do {                                                          \
-        __asm__ __volatile__ (                                    \
-            "    movl  "STR(COMPAT_MULTICALL_op)"(%0),%%eax; "    \
-            "    leaq  compat_hypercall_table(%%rip),%%rdi; "     \
-            "    cmpl  $("STR(NR_hypercalls)"),%%eax; "           \
-            "    jae   2f; "                                      \
-            "    movq  (%%rdi,%%rax,8),%%rax; "                   \
-            "    movl  "STR(COMPAT_MULTICALL_arg0)"(%0),%%edi; "  \
-            "    movl  "STR(COMPAT_MULTICALL_arg1)"(%0),%%esi; "  \
-            "    movl  "STR(COMPAT_MULTICALL_arg2)"(%0),%%edx; "  \
-            "    movl  "STR(COMPAT_MULTICALL_arg3)"(%0),%%ecx; "  \
-            "    movl  "STR(COMPAT_MULTICALL_arg4)"(%0),%%r8d; "  \
-            "    callq *%%rax; "                                  \
-            "1:  movl  %%eax,"STR(COMPAT_MULTICALL_result)"(%0)\n"\
-            ".section .fixup,\"ax\"\n"                            \
-            "2:  movl  $-"STR(ENOSYS)",%%eax\n"                   \
-            "    jmp   1b\n"                                      \
-            ".previous\n"                                         \
-            : : "b" (_call)                                       \
-              /* all the caller-saves registers */                \
-            : "rax", "rcx", "rdx", "rsi", "rdi",                  \
-              "r8",  "r9",  "r10", "r11" );                       \
-    } while ( 0 )
+#define compat_multicall_call(_call)                         \
+        __asm__ __volatile__ (                               \
+            "    movl  %c1(%0),%%eax; "                      \
+            "    leaq  compat_hypercall_table(%%rip),%%rdi; "\
+            "    cmpl  $("STR(NR_hypercalls)"),%%eax; "      \
+            "    jae   2f; "                                 \
+            "    movq  (%%rdi,%%rax,8),%%rax; "              \
+            "    movl  %c2+0*%c3(%0),%%edi; "                \
+            "    movl  %c2+1*%c3(%0),%%esi; "                \
+            "    movl  %c2+2*%c3(%0),%%edx; "                \
+            "    movl  %c2+3*%c3(%0),%%ecx; "                \
+            "    movl  %c2+4*%c3(%0),%%r8d; "                \
+            "    callq *%%rax; "                             \
+            "1:  movl  %%eax,%c4(%0)\n"                      \
+            ".section .fixup,\"ax\"\n"                       \
+            "2:  movl  $-"STR(ENOSYS)",%%eax\n"              \
+            "    jmp   1b\n"                                 \
+            ".previous\n"                                    \
+            :                                                \
+            : "b" (_call),                                   \
+              "i" (offsetof(__typeof__(*_call), op)),        \
+              "i" (offsetof(__typeof__(*_call), args)),      \
+              "i" (sizeof(*(_call)->args)),                  \
+              "i" (offsetof(__typeof__(*_call), result))     \
+              /* all the caller-saves registers */           \
+            : "rax", "rcx", "rdx", "rsi", "rdi",             \
+              "r8",  "r9",  "r10", "r11" )                   \
 
 #else
 
 #define do_multicall_call(_call)                             \
-    do {                                                     \
         __asm__ __volatile__ (                               \
-            "    pushl "STR(MULTICALL_arg4)"(%0); "          \
-            "    pushl "STR(MULTICALL_arg3)"(%0); "          \
-            "    pushl "STR(MULTICALL_arg2)"(%0); "          \
-            "    pushl "STR(MULTICALL_arg1)"(%0); "          \
-            "    pushl "STR(MULTICALL_arg0)"(%0); "          \
-            "    movl  "STR(MULTICALL_op)"(%0),%%eax; "      \
+            "    movl  %c1(%0),%%eax; "                      \
+            "    pushl %c2+4*%c3(%0); "                      \
+            "    pushl %c2+3*%c3(%0); "                      \
+            "    pushl %c2+2*%c3(%0); "                      \
+            "    pushl %c2+1*%c3(%0); "                      \
+            "    pushl %c2+0*%c3(%0); "                      \
             "    cmpl  $("STR(NR_hypercalls)"),%%eax; "      \
             "    jae   2f; "                                 \
             "    call  *hypercall_table(,%%eax,4); "         \
-            "1:  movl  %%eax,"STR(MULTICALL_result)"(%0); "  \
+            "1:  movl  %%eax,%c4(%0); "                      \
             "    addl  $20,%%esp\n"                          \
             ".section .fixup,\"ax\"\n"                       \
             "2:  movl  $-"STR(ENOSYS)",%%eax\n"              \
             "    jmp   1b\n"                                 \
             ".previous\n"                                    \
-            : : "b" (_call)                                  \
+            :                                                \
+            : "bSD" (_call),                                 \
+              "i" (offsetof(__typeof__(*_call), op)),        \
+              "i" (offsetof(__typeof__(*_call), args)),      \
+              "i" (sizeof(*(_call)->args)),                  \
+              "i" (offsetof(__typeof__(*_call), result))     \
               /* all the caller-saves registers */           \
-            : "eax", "ecx", "edx" );                         \
-    } while ( 0 )
+            : "eax", "ecx", "edx" )                          \
 
 #endif
 
Index: 2007-03-19/xen/include/asm-x86/perfc_defn.h
===================================================================
--- 2007-03-19.orig/xen/include/asm-x86/perfc_defn.h    2007-03-27 
10:31:15.000000000 +0200
+++ 2007-03-19/xen/include/asm-x86/perfc_defn.h 2007-03-27 10:32:56.000000000 
+0200
@@ -18,9 +18,11 @@ PERFCOUNTER_CPU(apic_timer,             
 
 PERFCOUNTER_CPU(domain_page_tlb_flush,  "domain page tlb flushes")
 
-PERFCOUNTER_CPU(calls_to_mmu_update,    "calls_to_mmu_update")
-PERFCOUNTER_CPU(num_page_updates,       "num_page_updates")
-PERFCOUNTER_CPU(calls_to_update_va,     "calls_to_update_va_map")
+PERFCOUNTER(calls_to_mmuext_op,         "calls to mmuext_op")
+PERFCOUNTER(num_mmuext_ops,             "mmuext ops")
+PERFCOUNTER(calls_to_mmu_update,        "calls to mmu_update")
+PERFCOUNTER(num_page_updates,           "page updates")
+PERFCOUNTER(calls_to_update_va,         "calls to update_va_map")
 PERFCOUNTER_CPU(page_faults,            "page faults")
 PERFCOUNTER_CPU(copy_user_faults,       "copy_user faults")
 
Index: 2007-03-19/xen/include/asm-x86/x86_32/asm_defns.h
===================================================================
--- 2007-03-19.orig/xen/include/asm-x86/x86_32/asm_defns.h      2006-02-28 
17:35:12.000000000 +0100
+++ 2007-03-19/xen/include/asm-x86/x86_32/asm_defns.h   2007-03-27 
12:19:53.000000000 +0200
@@ -1,6 +1,8 @@
 #ifndef __X86_32_ASM_DEFNS_H__
 #define __X86_32_ASM_DEFNS_H__
 
+#include <asm/percpu.h>
+
 #ifndef NDEBUG
 /* Indicate special exception stack frame by inverting the frame pointer. */
 #define SETUP_EXCEPTION_FRAME_POINTER           \
@@ -47,10 +49,14 @@
         1:
 
 #ifdef PERF_COUNTERS
-#define PERFC_INCR(_name,_idx)                          \
-        lock incl perfcounters+_name(,_idx,4)
+#define PERFC_INCR(_name,_idx,_cur)                     \
+        pushl _cur;                                     \
+        movl VCPU_processor(_cur),_cur;                 \
+        shll $PERCPU_SHIFT,_cur;                        \
+        incl per_cpu__perfcounters+_name*4(_cur,_idx,4);\
+        popl _cur
 #else
-#define PERFC_INCR(_name,_idx)
+#define PERFC_INCR(_name,_idx,_cur)
 #endif
 
 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
Index: 2007-03-19/xen/include/asm-x86/x86_64/asm_defns.h
===================================================================
--- 2007-03-19.orig/xen/include/asm-x86/x86_64/asm_defns.h      2006-10-23 
08:33:08.000000000 +0200
+++ 2007-03-19/xen/include/asm-x86/x86_64/asm_defns.h   2007-03-27 
13:16:39.000000000 +0200
@@ -1,6 +1,8 @@
 #ifndef __X86_64_ASM_DEFNS_H__
 #define __X86_64_ASM_DEFNS_H__
 
+#include <asm/percpu.h>
+
 #ifndef NDEBUG
 /* Indicate special exception stack frame by inverting the frame pointer. */
 #define SETUP_EXCEPTION_FRAME_POINTER           \
@@ -47,13 +49,18 @@
         popq  %rdi;
 
 #ifdef PERF_COUNTERS
-#define PERFC_INCR(_name,_idx)                  \
-    pushq %rdx;                                 \
-    leaq perfcounters+_name(%rip),%rdx;         \
-    lock incl (%rdx,_idx,4);                    \
-    popq %rdx;
+#define PERFC_INCR(_name,_idx,_cur)             \
+        pushq _cur;                             \
+        movslq VCPU_processor(_cur),_cur;       \
+        pushq %rdx;                             \
+        leaq per_cpu__perfcounters(%rip),%rdx;  \
+        shlq $PERCPU_SHIFT,_cur;                \
+        addq %rdx,_cur;                         \
+        popq %rdx;                              \
+        incl _name*4(_cur,_idx,4);              \
+        popq _cur
 #else
-#define PERFC_INCR(_name,_idx)
+#define PERFC_INCR(_name,_idx,_cur)
 #endif
 
 /* Work around AMD erratum #88 */
Index: 2007-03-19/xen/include/xen/perfc.h
===================================================================
--- 2007-03-19.orig/xen/include/xen/perfc.h     2007-03-27 10:31:15.000000000 
+0200
+++ 2007-03-19/xen/include/xen/perfc.h  2007-03-27 16:57:00.000000000 +0200
@@ -6,102 +6,94 @@
 
 #include <xen/lib.h>
 #include <xen/smp.h>
-#include <asm/atomic.h>
+#include <xen/percpu.h>
 
 /* 
  * NOTE: new counters must be defined in perfc_defn.h
  * 
  * PERFCOUNTER (counter, string)              define a new performance counter
- * PERFCOUNTER_CPU (counter, string, size)    define a counter per CPU
- * PERFCOUNTER_ARRY (counter, string, size)   define an array of counters
+ * PERFCOUNTER_ARRAY (counter, string, size)  define an array of counters
  * 
  * unlike "COUNTERS", "STATUS" variables DO NOT RESET
  * PERFSTATUS (counter, string)               define a new performance stauts
- * PERFSTATUS_CPU (counter, string, size)     define a status var per CPU
- * PERFSTATUS_ARRY (counter, string, size)    define an array of status vars
+ * PERFSTATUS_ARRAY (counter, string, size)   define an array of status vars
  * 
  * unsigned long perfc_value  (counter)        get value of a counter  
- * unsigned long perfc_valuec (counter)        get value of a per CPU counter
  * unsigned long perfc_valuea (counter, index) get value of an array counter
  * unsigned long perfc_set  (counter, val)     set value of a counter  
- * unsigned long perfc_setc (counter, val)     set value of a per CPU counter
  * unsigned long perfc_seta (counter, index, val) set value of an array counter
  * void perfc_incr  (counter)                  increment a counter          
- * void perfc_incrc (counter, index)           increment a per CPU counter   
+ * void perfc_decr  (counter)                  decrement a status
  * void perfc_incra (counter, index)           increment an array counter   
  * void perfc_add   (counter, value)           add a value to a counter     
- * void perfc_addc  (counter, value)           add a value to a per CPU counter
  * void perfc_adda  (counter, index, value)    add a value to array counter 
  * void perfc_print (counter)                  print out the counter
  */
 
-#define PERFCOUNTER( var, name ) \
-  atomic_t var[1];
-#define PERFCOUNTER_CPU( var, name ) \
-  atomic_t var[NR_CPUS];
-#define PERFCOUNTER_ARRAY( var, name, size ) \
-  atomic_t var[size];
-#define PERFSTATUS( var, name ) \
-  atomic_t var[1];
-#define PERFSTATUS_CPU( var, name ) \
-  atomic_t var[NR_CPUS];
-#define PERFSTATUS_ARRAY( var, name, size ) \
-  atomic_t var[size];
+#define PERFCOUNTER( name, descr ) \
+  PERFC_ ## name,
+#define PERFCOUNTER_ARRAY( name, descr, size ) \
+  PERFC_ ## name,                              \
+  PERFC_LAST_ ## name = PERFC_ ## name + (size) - sizeof(char[2 * !!(size) - 
1]),
 
-struct perfcounter {
+#define PERFSTATUS       PERFCOUNTER
+#define PERFSTATUS_ARRAY PERFCOUNTER_ARRAY
+
+/* Compatibility: This should go away once all users got converted. */
+#define PERFCOUNTER_CPU PERFCOUNTER
+
+enum perfcounter {
 #include <xen/perfc_defn.h>
+       NUM_PERFCOUNTERS
 };
 
-extern struct perfcounter perfcounters;
+#undef PERFCOUNTER
+#undef PERFCOUNTER_ARRAY
+#undef PERFSTATUS
+#undef PERFSTATUS_ARRAY
+
+typedef unsigned perfc_t;
+#define PRIperfc ""
 
-#define perfc_value(x)    atomic_read(&perfcounters.x[0])
-#define perfc_valuec(x)   atomic_read(&perfcounters.x[smp_processor_id()])
+DECLARE_PER_CPU(perfc_t[NUM_PERFCOUNTERS], perfcounters);
+
+#define perfc_value(x)    this_cpu(perfcounters)[PERFC_ ## x]
 #define perfc_valuea(x,y)                                               \
-    ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ?       \
-       atomic_read(&perfcounters.x[y]) : 0 )
-#define perfc_set(x,v)    atomic_set(&perfcounters.x[0], v)
-#define perfc_setc(x,v)   atomic_set(&perfcounters.x[smp_processor_id()], v)
+    ( (y) <= PERFC_LAST_ ## x - PERFC_ ## x ?                           \
+        this_cpu(perfcounters)[PERFC_ ## x + (y)] : 0 )
+#define perfc_set(x,v)    (this_cpu(perfcounters)[PERFC_ ## x] = (v))
 #define perfc_seta(x,y,v)                                               \
-    do {                                                                \
-        if ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ) \
-            atomic_set(&perfcounters.x[y], v);                          \
-    } while ( 0 )
-#define perfc_incr(x)     atomic_inc(&perfcounters.x[0])
-#define perfc_decr(x)     atomic_dec(&perfcounters.x[0])
-#define perfc_incrc(x)    atomic_inc(&perfcounters.x[smp_processor_id()])
-#define perfc_decrc(x)    atomic_dec(&perfcounters.x[smp_processor_id()])
+    ( (y) <= PERFC_LAST_ ## x - PERFC_ ## x ?                           \
+        this_cpu(perfcounters)[PERFC_ ## x + (y)] = (v) : (v) )
+#define perfc_incr(x)     (++this_cpu(perfcounters)[PERFC_ ## x])
+#define perfc_decr(x)     (--this_cpu(perfcounters)[PERFC_ ## x])
 #define perfc_incra(x,y)                                                \
-    do {                                                                \
-        if ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ) \
-            atomic_inc(&perfcounters.x[y]);                             \
-    } while ( 0 )
-#define perfc_add(x,y)    atomic_add((y), &perfcounters.x[0])
-#define perfc_addc(x,y)   atomic_add((y), &perfcounters.x[smp_processor_id()])
-#define perfc_adda(x,y,z)                                               \
-    do {                                                                \
-        if ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ) \
-            atomic_add((z), &perfcounters.x[y]);                        \
-    } while ( 0 )
+    ( (y) <= PERFC_LAST_ ## x - PERFC_ ## x ?                           \
+        ++this_cpu(perfcounters)[PERFC_ ## x + (y)] : 0 )
+#define perfc_add(x,v)    (this_cpu(perfcounters)[PERFC_ ## x] += (v))
+#define perfc_adda(x,y,v)                                               \
+    ( (y) <= PERFC_LAST_ ## x - PERFC_ ## x ?                           \
+        this_cpu(perfcounters)[PERFC_ ## x + (y)] = (v) : (v) )
 
 /*
  * Histogram: special treatment for 0 and 1 count. After that equally spaced 
  * with last bucket taking the rest.
  */
 #ifdef PERF_ARRAYS
-#define perfc_incr_histo(_x,_v,_n)                                          \
-    do {                                                                    \
-        if ( (_v) == 0 )                                                    \
-            perfc_incra(_x, 0);                                             \
-        else if ( (_v) == 1 )                                               \
-            perfc_incra(_x, 1);                                             \
-        else if ( (((_v)-2) / PERFC_ ## _n ## _BUCKET_SIZE) <               \
-                  (PERFC_MAX_ ## _n - 3) )                                  \
-            perfc_incra(_x, (((_v)-2) / PERFC_ ## _n ## _BUCKET_SIZE) + 2); \
-        else                                                                \
-            perfc_incra(_x, PERFC_MAX_ ## _n - 1);                          \
+#define perfc_incr_histo(x,v)                                           \
+    do {                                                                \
+        if ( (v) == 0 )                                                 \
+            perfc_incra(x, 0);                                          \
+        else if ( (v) == 1 )                                            \
+            perfc_incra(x, 1);                                          \
+        else if ( (((v) - 2) / PERFC_ ## x ## _BUCKET_SIZE) <           \
+                  (PERFC_LAST_ ## x - PERFC_ ## x - 2) )                \
+            perfc_incra(x, (((v) - 2) / PERFC_ ## x ## _BUCKET_SIZE) + 2); \
+        else                                                            \
+            perfc_incra(x, PERFC_LAST_ ## x - PERFC_ ## x);             \
     } while ( 0 )
 #else
-#define perfc_incr_histo(_x,_v,_n) ((void)0)
+#define perfc_incr_histo(x,v) ((void)0)
 #endif
 
 struct xen_sysctl_perfc_op;
@@ -110,22 +102,20 @@ int perfc_control(struct xen_sysctl_perf
 #else /* PERF_COUNTERS */
 
 #define perfc_value(x)    (0)
-#define perfc_valuec(x)   (0)
 #define perfc_valuea(x,y) (0)
 #define perfc_set(x,v)    ((void)0)
-#define perfc_setc(x,v)   ((void)0)
 #define perfc_seta(x,y,v) ((void)0)
 #define perfc_incr(x)     ((void)0)
 #define perfc_decr(x)     ((void)0)
-#define perfc_incrc(x)    ((void)0)
-#define perfc_decrc(x)    ((void)0)
 #define perfc_incra(x,y)  ((void)0)
 #define perfc_decra(x,y)  ((void)0)
 #define perfc_add(x,y)    ((void)0)
-#define perfc_addc(x,y)   ((void)0)
 #define perfc_adda(x,y,z) ((void)0)
 #define perfc_incr_histo(x,y,z) ((void)0)
 
 #endif /* PERF_COUNTERS */
 
+/* Compatibility: This should go away once all users got converted. */
+#define perfc_incrc     perfc_incr
+
 #endif /* __XEN_PERFC_H__ */
Index: 2007-03-19/xen/include/xen/perfc_defn.h
===================================================================
--- 2007-03-19.orig/xen/include/xen/perfc_defn.h        2007-03-27 
10:31:15.000000000 +0200
+++ 2007-03-19/xen/include/xen/perfc_defn.h     2007-03-27 10:32:56.000000000 
+0200
@@ -6,6 +6,9 @@
 
 PERFCOUNTER_ARRAY(hypercalls,           "hypercalls", NR_hypercalls)
 
+PERFCOUNTER(calls_to_multicall,         "calls to multicall")
+PERFCOUNTER(calls_from_multicall,       "calls from multicall")
+
 PERFCOUNTER_CPU(irqs,                   "#interrupts")
 PERFCOUNTER_CPU(ipis,                   "#IPIs")
 



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH] make all performance counter per-cpu, Jan Beulich <=