WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] xen: Make all performance counter per-cpu

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] xen: Make all performance counter per-cpu, avoiding the need to update
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Tue, 27 Mar 2007 16:10:32 -0700
Delivery-date: Tue, 27 Mar 2007 16:48:59 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User kfraser@xxxxxxxxxxxxxxxxxxxxx
# Date 1175009737 -3600
# Node ID 96f167771979734080bb0f45086f1d4f699fa50d
# Parent  3375391fb0c901f4d3edb73c02a1169b6bd3d7b4
xen: Make all performance counter per-cpu, avoiding the need to update
them with atomic (locked) ops.

Conversion here isn't complete in the sense that many places still use
the old per-CPU accessors (which are now redundant). Since the patch
is already rather big, I'd prefer replacing those in a subsequent
patch.

While doing this, I also converted x86's multicall macros to no longer
require inclusion of asm-offsets.h in the respective C file (on IA64
the use of asm-offsets.h in C sources seems more wide spread, hence
there I rather used IA64_ prefixes for the otherwise conflicting
performance counter indices).

On x86, a few counter increments get moved a little, to avoid
duplicate counting of preempted hypercalls.

Also, a few counters are being added.

IA64 changes only compile-tested, hence somebody doing active IA64
work may want to have a close look at those changes.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>
---
 xen/arch/ia64/asm-offsets.c                   |    9 -
 xen/arch/ia64/xen/hyperprivop.S               |   13 -
 xen/arch/ia64/xen/privop_stat.c               |   86 +++++-----
 xen/arch/ia64/xen/vhpt.c                      |    8 
 xen/arch/x86/mm.c                             |   11 -
 xen/arch/x86/x86_32/asm-offsets.c             |   14 -
 xen/arch/x86/x86_32/entry.S                   |    4 
 xen/arch/x86/x86_64/asm-offsets.c             |   26 ---
 xen/arch/x86/x86_64/compat/entry.S            |    2 
 xen/arch/x86/x86_64/entry.S                   |    4 
 xen/common/multicall.c                        |    5 
 xen/common/perfc.c                            |  223 +++++++++++++-------------
 xen/include/asm-ia64/linux-xen/asm/asmmacro.h |    4 
 xen/include/asm-ia64/perfc_defn.h             |    4 
 xen/include/asm-ia64/privop_stat.h            |   25 +-
 xen/include/asm-x86/multicall.h               |  104 ++++++------
 xen/include/asm-x86/perfc_defn.h              |    8 
 xen/include/asm-x86/x86_32/asm_defns.h        |   12 +
 xen/include/asm-x86/x86_64/asm_defns.h        |   19 +-
 xen/include/xen/perfc.h                       |  120 ++++++-------
 xen/include/xen/perfc_defn.h                  |    3 
 21 files changed, 357 insertions(+), 347 deletions(-)

diff -r 3375391fb0c9 -r 96f167771979 xen/arch/ia64/asm-offsets.c
--- a/xen/arch/ia64/asm-offsets.c       Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/arch/ia64/asm-offsets.c       Tue Mar 27 16:35:37 2007 +0100
@@ -223,10 +223,11 @@ void foo(void)
 
 #ifdef PERF_COUNTERS
        BLANK();
-       DEFINE(RECOVER_TO_PAGE_FAULT_PERFC_OFS, offsetof (struct perfcounter, 
recover_to_page_fault));
-       DEFINE(RECOVER_TO_BREAK_FAULT_PERFC_OFS, offsetof (struct perfcounter, 
recover_to_break_fault));
-       DEFINE(FAST_HYPERPRIVOP_PERFC_OFS, offsetof (struct perfcounter, 
fast_hyperprivop));
-       DEFINE(FAST_REFLECT_PERFC_OFS, offsetof (struct perfcounter, 
fast_reflect));
+       DEFINE(IA64_PERFC_recover_to_page_fault, PERFC_recover_to_page_fault);
+       DEFINE(IA64_PERFC_recover_to_break_fault, PERFC_recover_to_break_fault);
+       DEFINE(IA64_PERFC_fast_vhpt_translate, PERFC_fast_vhpt_translate);
+       DEFINE(IA64_PERFC_fast_hyperprivop, PERFC_fast_hyperprivop);
+       DEFINE(IA64_PERFC_fast_reflect, PERFC_fast_reflect);
 #endif
 
        BLANK();
diff -r 3375391fb0c9 -r 96f167771979 xen/arch/ia64/xen/hyperprivop.S
--- a/xen/arch/ia64/xen/hyperprivop.S   Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/arch/ia64/xen/hyperprivop.S   Tue Mar 27 16:35:37 2007 +0100
@@ -26,8 +26,7 @@
 # define FAST_HYPERPRIVOPS
 # ifdef PERF_COUNTERS
 #  define FAST_HYPERPRIVOP_CNT
-#  define FAST_HYPERPRIVOP_PERFC(N) \
-       (perfcounters + FAST_HYPERPRIVOP_PERFC_OFS + (4 * N))
+#  define FAST_HYPERPRIVOP_PERFC(N) PERFC(fast_hyperprivop + N)
 #  define FAST_REFLECT_CNT
 # endif
        
@@ -364,7 +363,7 @@ GLOBAL_ENTRY(fast_tick_reflect)
        mov rp=r29;;
        mov cr.itm=r26;;        // ensure next tick
 #ifdef FAST_REFLECT_CNT
-       movl r20=perfcounters+FAST_REFLECT_PERFC_OFS+((0x3000>>8)*4);;
+       movl r20=PERFC(fast_reflect + (0x3000>>8));;
        ld4 r21=[r20];;
        adds r21=1,r21;;
        st4 [r20]=r21;;
@@ -597,7 +596,7 @@ END(fast_break_reflect)
 //     r31 == pr
 ENTRY(fast_reflect)
 #ifdef FAST_REFLECT_CNT
-       movl r22=perfcounters+FAST_REFLECT_PERFC_OFS;
+       movl r22=PERFC(fast_reflect);
        shr r23=r20,8-2;;
        add r22=r22,r23;;
        ld4 r21=[r22];;
@@ -938,7 +937,7 @@ 1:  // check the guest VHPT
 (p7)   br.cond.spnt.few page_not_present;;
 
 #ifdef FAST_REFLECT_CNT
-       movl r21=perfcounter+FAST_VHPT_TRANSLATE_PERFC_OFS;;
+       movl r21=PERFC(fast_vhpt_translate);;
        ld4 r22=[r21];;
        adds r22=1,r22;;
        st4 [r21]=r22;;
@@ -968,7 +967,7 @@ END(fast_tlb_miss_reflect)
 // we get here if fast_insert fails (e.g. due to metaphysical lookup)
 ENTRY(recover_and_page_fault)
 #ifdef PERF_COUNTERS
-       movl r21=perfcounters + RECOVER_TO_PAGE_FAULT_PERFC_OFS;;
+       movl r21=PERFC(recover_to_page_fault);;
        ld4 r22=[r21];;
        adds r22=1,r22;;
        st4 [r21]=r22;;
@@ -1832,7 +1831,7 @@ END(hyper_ptc_ga)
 // recovery block for hyper_itc metaphysical memory lookup
 ENTRY(recover_and_dispatch_break_fault)
 #ifdef PERF_COUNTERS
-       movl r21=perfcounters + RECOVER_TO_BREAK_FAULT_PERFC_OFS;;
+       movl r21=PERFC(recover_to_break_fault);;
        ld4 r22=[r21];;
        adds r22=1,r22;;
        st4 [r21]=r22;;
diff -r 3375391fb0c9 -r 96f167771979 xen/arch/ia64/xen/privop_stat.c
--- a/xen/arch/ia64/xen/privop_stat.c   Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/arch/ia64/xen/privop_stat.c   Tue Mar 27 16:35:37 2007 +0100
@@ -10,48 +10,39 @@ struct privop_addr_count {
        unsigned long addr[PRIVOP_COUNT_NADDRS];
        unsigned int count[PRIVOP_COUNT_NADDRS];
        unsigned int overflow;
-       atomic_t *perfc_addr;
-       atomic_t *perfc_count;
-       atomic_t *perfc_overflow;
 };
 
-#undef  PERFCOUNTER
+struct privop_addr_info {
+       enum perfcounter perfc_addr;
+       enum perfcounter perfc_count;
+       enum perfcounter perfc_overflow;
+};
+
 #define PERFCOUNTER(var, name)
-
-#undef  PERFCOUNTER_CPU
-#define PERFCOUNTER_CPU(var, name)
-
-#undef  PERFCOUNTER_ARRAY
 #define PERFCOUNTER_ARRAY(var, name, size)
 
-#undef  PERFSTATUS
 #define PERFSTATUS(var, name)
-
-#undef  PERFSTATUS_CPU
-#define PERFSTATUS_CPU(var, name)
-
-#undef  PERFSTATUS_ARRAY
 #define PERFSTATUS_ARRAY(var, name, size)
 
-#undef PERFPRIVOPADDR
 #define PERFPRIVOPADDR(name)                        \
     {                                               \
-        { 0 }, { 0 }, 0,                            \
-        perfcounters.privop_addr_##name##_addr,     \
-        perfcounters.privop_addr_##name##_count,    \
-        perfcounters.privop_addr_##name##_overflow  \
+        PERFC_privop_addr_##name##_addr,            \
+        PERFC_privop_addr_##name##_count,           \
+        PERFC_privop_addr_##name##_overflow         \
     },
 
-static struct privop_addr_count privop_addr_counter[] = {
+static const struct privop_addr_info privop_addr_info[] = {
 #include <asm/perfc_defn.h>
 };
 
 #define PRIVOP_COUNT_NINSTS \
-        (sizeof(privop_addr_counter) / sizeof(privop_addr_counter[0]))
+        (sizeof(privop_addr_info) / sizeof(privop_addr_info[0]))
+
+static DEFINE_PER_CPU(struct privop_addr_count[PRIVOP_COUNT_NINSTS], 
privop_addr_counter);
 
 void privop_count_addr(unsigned long iip, enum privop_inst inst)
 {
-       struct privop_addr_count *v = &privop_addr_counter[inst];
+       struct privop_addr_count *v = this_cpu(privop_addr_counter) + inst;
        int i;
 
        if (inst >= PRIVOP_COUNT_NINSTS)
@@ -72,31 +63,44 @@ void privop_count_addr(unsigned long iip
 
 void gather_privop_addrs(void)
 {
-       int i, j;
-       atomic_t *v;
-       for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) {
-               /* Note: addresses are truncated!  */
-               v = privop_addr_counter[i].perfc_addr;
-               for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
-                       atomic_set(&v[j], privop_addr_counter[i].addr[j]);
+       unsigned int cpu;
 
-               v = privop_addr_counter[i].perfc_count;
-               for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
-                       atomic_set(&v[j], privop_addr_counter[i].count[j]);
+       for_each_cpu ( cpu ) {
+               perfc_t *perfcounters = per_cpu(perfcounters, cpu);
+               struct privop_addr_count *s = per_cpu(privop_addr_counter, cpu);
+               int i, j;
+
+               for (i = 0; i < PRIVOP_COUNT_NINSTS; i++, s++) {
+                       perfc_t *d;
+
+                       /* Note: addresses are truncated!  */
+                       d = perfcounters + privop_addr_info[i].perfc_addr;
+                       for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
+                               d[j] = s->addr[j];
+
+                       d = perfcounters + privop_addr_info[i].perfc_count;
+                       for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
+                               d[j] = s->count[j];
                
-               atomic_set(privop_addr_counter[i].perfc_overflow,
-                          privop_addr_counter[i].overflow);
+                       perfcounters[privop_addr_info[i].perfc_overflow] =
+                               s->overflow;
+               }
        }
 }
 
 void reset_privop_addrs(void)
 {
-       int i, j;
-       for (i = 0; i < PRIVOP_COUNT_NINSTS; i++) {
-               struct privop_addr_count *v = &privop_addr_counter[i];
-               for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
-                       v->addr[j] = v->count[j] = 0;
-               v->overflow = 0;
+       unsigned int cpu;
+
+       for_each_cpu ( cpu ) {
+               struct privop_addr_count *v = per_cpu(privop_addr_counter, cpu);
+               int i, j;
+
+               for (i = 0; i < PRIVOP_COUNT_NINSTS; i++, v++) {
+                       for (j = 0; j < PRIVOP_COUNT_NADDRS; j++)
+                               v->addr[j] = v->count[j] = 0;
+                       v->overflow = 0;
+               }
        }
 }
 #endif
diff -r 3375391fb0c9 -r 96f167771979 xen/arch/ia64/xen/vhpt.c
--- a/xen/arch/ia64/xen/vhpt.c  Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/arch/ia64/xen/vhpt.c  Tue Mar 27 16:35:37 2007 +0100
@@ -512,7 +512,7 @@ void gather_vhpt_stats(void)
                for (i = 0; i < VHPT_NUM_ENTRIES; i++, v++)
                        if (!(v->ti_tag & INVALID_TI_TAG))
                                vhpt_valid++;
-               perfc_seta(vhpt_valid_entries, cpu, vhpt_valid);
-       }
-}
-#endif
+               per_cpu(perfcounters, cpu)[PERFC_vhpt_valid_entries] = 
vhpt_valid;
+       }
+}
+#endif
diff -r 3375391fb0c9 -r 96f167771979 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/arch/x86/mm.c Tue Mar 27 16:35:37 2007 +0100
@@ -1969,6 +1969,8 @@ int do_mmuext_op(
         if ( unlikely(!guest_handle_is_null(pdone)) )
             (void)copy_from_guest(&done, pdone, 1);
     }
+    else
+        perfc_incr(calls_to_mmuext_op);
 
     if ( unlikely(!guest_handle_okay(uops, count)) )
     {
@@ -2223,6 +2225,8 @@ int do_mmuext_op(
 
     UNLOCK_BIGLOCK(d);
 
+    perfc_add(num_mmuext_ops, i);
+
  out:
     /* Add incremental work we have done to the @done output parameter. */
     if ( unlikely(!guest_handle_is_null(pdone)) )
@@ -2257,6 +2261,8 @@ int do_mmu_update(
         if ( unlikely(!guest_handle_is_null(pdone)) )
             (void)copy_from_guest(&done, pdone, 1);
     }
+    else
+        perfc_incr(calls_to_mmu_update);
 
     if ( unlikely(!guest_handle_okay(ureqs, count)) )
     {
@@ -2272,9 +2278,6 @@ int do_mmu_update(
 
     domain_mmap_cache_init(&mapcache);
     domain_mmap_cache_init(&sh_mapcache);
-
-    perfc_incrc(calls_to_mmu_update);
-    perfc_addc(num_page_updates, count);
 
     LOCK_BIGLOCK(d);
 
@@ -2438,6 +2441,8 @@ int do_mmu_update(
     domain_mmap_cache_destroy(&mapcache);
     domain_mmap_cache_destroy(&sh_mapcache);
 
+    perfc_add(num_page_updates, i);
+
  out:
     /* Add incremental work we have done to the @done output parameter. */
     if ( unlikely(!guest_handle_is_null(pdone)) )
diff -r 3375391fb0c9 -r 96f167771979 xen/arch/x86/x86_32/asm-offsets.c
--- a/xen/arch/x86/x86_32/asm-offsets.c Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/arch/x86/x86_32/asm-offsets.c Tue Mar 27 16:35:37 2007 +0100
@@ -107,20 +107,10 @@ void __dummy__(void)
     BLANK();
 
 #if PERF_COUNTERS
-    OFFSET(PERFC_hypercalls, struct perfcounter, hypercalls);
-    OFFSET(PERFC_exceptions, struct perfcounter, exceptions);
+    DEFINE(PERFC_hypercalls, PERFC_hypercalls);
+    DEFINE(PERFC_exceptions, PERFC_exceptions);
     BLANK();
 #endif
-
-    OFFSET(MULTICALL_op, struct multicall_entry, op);
-    OFFSET(MULTICALL_arg0, struct multicall_entry, args[0]);
-    OFFSET(MULTICALL_arg1, struct multicall_entry, args[1]);
-    OFFSET(MULTICALL_arg2, struct multicall_entry, args[2]);
-    OFFSET(MULTICALL_arg3, struct multicall_entry, args[3]);
-    OFFSET(MULTICALL_arg4, struct multicall_entry, args[4]);
-    OFFSET(MULTICALL_arg5, struct multicall_entry, args[5]);
-    OFFSET(MULTICALL_result, struct multicall_entry, result);
-    BLANK();
 
     DEFINE(FIXMAP_apic_base, fix_to_virt(FIX_APIC_BASE));
     BLANK();
diff -r 3375391fb0c9 -r 96f167771979 xen/arch/x86/x86_32/entry.S
--- a/xen/arch/x86/x86_32/entry.S       Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/arch/x86/x86_32/entry.S       Tue Mar 27 16:35:37 2007 +0100
@@ -173,7 +173,7 @@ ENTRY(hypercall)
         GET_CURRENT(%ebx)
         cmpl  $NR_hypercalls,%eax
         jae   bad_hypercall
-        PERFC_INCR(PERFC_hypercalls, %eax)
+        PERFC_INCR(PERFC_hypercalls, %eax, %ebx)
 #ifndef NDEBUG
         /* Create shadow parameters and corrupt those not used by this call. */
         pushl %eax
@@ -429,7 +429,7 @@ 1:      xorl  %eax,%eax
         movl  %esp,%edx
         pushl %edx                      # push the cpu_user_regs pointer
         GET_CURRENT(%ebx)
-        PERFC_INCR(PERFC_exceptions, %eax)
+        PERFC_INCR(PERFC_exceptions, %eax, %ebx)
         call  *exception_table(,%eax,4)
         addl  $4,%esp
         movl  UREGS_eflags(%esp),%eax
diff -r 3375391fb0c9 -r 96f167771979 xen/arch/x86/x86_64/asm-offsets.c
--- a/xen/arch/x86/x86_64/asm-offsets.c Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/arch/x86/x86_64/asm-offsets.c Tue Mar 27 16:35:37 2007 +0100
@@ -121,30 +121,8 @@ void __dummy__(void)
     BLANK();
 
 #if PERF_COUNTERS
-    OFFSET(PERFC_hypercalls, struct perfcounter, hypercalls);
-    OFFSET(PERFC_exceptions, struct perfcounter, exceptions);
-    BLANK();
-#endif
-
-    OFFSET(MULTICALL_op, struct multicall_entry, op);
-    OFFSET(MULTICALL_arg0, struct multicall_entry, args[0]);
-    OFFSET(MULTICALL_arg1, struct multicall_entry, args[1]);
-    OFFSET(MULTICALL_arg2, struct multicall_entry, args[2]);
-    OFFSET(MULTICALL_arg3, struct multicall_entry, args[3]);
-    OFFSET(MULTICALL_arg4, struct multicall_entry, args[4]);
-    OFFSET(MULTICALL_arg5, struct multicall_entry, args[5]);
-    OFFSET(MULTICALL_result, struct multicall_entry, result);
-    BLANK();
-
-#ifdef CONFIG_COMPAT
-    OFFSET(COMPAT_MULTICALL_op, struct compat_multicall_entry, op);
-    OFFSET(COMPAT_MULTICALL_arg0, struct compat_multicall_entry, args[0]);
-    OFFSET(COMPAT_MULTICALL_arg1, struct compat_multicall_entry, args[1]);
-    OFFSET(COMPAT_MULTICALL_arg2, struct compat_multicall_entry, args[2]);
-    OFFSET(COMPAT_MULTICALL_arg3, struct compat_multicall_entry, args[3]);
-    OFFSET(COMPAT_MULTICALL_arg4, struct compat_multicall_entry, args[4]);
-    OFFSET(COMPAT_MULTICALL_arg5, struct compat_multicall_entry, args[5]);
-    OFFSET(COMPAT_MULTICALL_result, struct compat_multicall_entry, result);
+    DEFINE(PERFC_hypercalls, PERFC_hypercalls);
+    DEFINE(PERFC_exceptions, PERFC_exceptions);
     BLANK();
 #endif
 
diff -r 3375391fb0c9 -r 96f167771979 xen/arch/x86/x86_64/compat/entry.S
--- a/xen/arch/x86/x86_64/compat/entry.S        Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/arch/x86/x86_64/compat/entry.S        Tue Mar 27 16:35:37 2007 +0100
@@ -57,7 +57,7 @@ ENTRY(compat_hypercall)
         movl  UREGS_rbx(%rsp),%edi   /* Arg 1        */
 #endif
         leaq  compat_hypercall_table(%rip),%r10
-        PERFC_INCR(PERFC_hypercalls, %rax)
+        PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
         callq *(%r10,%rax,8)
 #ifndef NDEBUG
         /* Deliberately corrupt parameter regs used by this hypercall. */
diff -r 3375391fb0c9 -r 96f167771979 xen/arch/x86/x86_64/entry.S
--- a/xen/arch/x86/x86_64/entry.S       Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/arch/x86/x86_64/entry.S       Tue Mar 27 16:35:37 2007 +0100
@@ -147,7 +147,7 @@ ENTRY(syscall_enter)
         pushq UREGS_rip+8(%rsp)
 #endif
         leaq  hypercall_table(%rip),%r10
-        PERFC_INCR(PERFC_hypercalls, %rax)
+        PERFC_INCR(PERFC_hypercalls, %rax, %rbx)
         callq *(%r10,%rax,8)
 #ifndef NDEBUG
         /* Deliberately corrupt parameter regs used by this hypercall. */
@@ -396,7 +396,7 @@ 1:      movq  %rsp,%rdi
         movl  UREGS_entry_vector(%rsp),%eax
         leaq  exception_table(%rip),%rdx
         GET_CURRENT(%rbx)
-        PERFC_INCR(PERFC_exceptions, %rax)
+        PERFC_INCR(PERFC_exceptions, %rax, %rbx)
         callq *(%rdx,%rax,8)
         testb $3,UREGS_cs(%rsp)
         jz    restore_all_xen
diff -r 3375391fb0c9 -r 96f167771979 xen/common/multicall.c
--- a/xen/common/multicall.c    Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/common/multicall.c    Tue Mar 27 16:35:37 2007 +0100
@@ -10,6 +10,7 @@
 #include <xen/event.h>
 #include <xen/multicall.h>
 #include <xen/guest_access.h>
+#include <xen/perfc.h>
 #include <asm/current.h>
 #include <asm/hardirq.h>
 
@@ -69,14 +70,18 @@ do_multicall(
         guest_handle_add_offset(call_list, 1);
     }
 
+    perfc_incr(calls_to_multicall);
+    perfc_add(calls_from_multicall, nr_calls);
     mcs->flags = 0;
     return 0;
 
  fault:
+    perfc_incr(calls_to_multicall);
     mcs->flags = 0;
     return -EFAULT;
 
  preempted:
+    perfc_add(calls_from_multicall, i);
     mcs->flags = 0;
     return hypercall_create_continuation(
         __HYPERVISOR_multicall, "hi", call_list, nr_calls-i);
diff -r 3375391fb0c9 -r 96f167771979 xen/common/perfc.c
--- a/xen/common/perfc.c        Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/common/perfc.c        Tue Mar 27 16:35:37 2007 +0100
@@ -10,81 +10,98 @@
 #include <public/sysctl.h>
 #include <asm/perfc.h>
 
-#undef  PERFCOUNTER
-#undef  PERFCOUNTER_CPU
-#undef  PERFCOUNTER_ARRAY
-#undef  PERFSTATUS
-#undef  PERFSTATUS_CPU
-#undef  PERFSTATUS_ARRAY
 #define PERFCOUNTER( var, name )              { name, TYPE_SINGLE, 0 },
-#define PERFCOUNTER_CPU( var, name )          { name, TYPE_CPU,    0 },
 #define PERFCOUNTER_ARRAY( var, name, size )  { name, TYPE_ARRAY,  size },
 #define PERFSTATUS( var, name )               { name, TYPE_S_SINGLE, 0 },
-#define PERFSTATUS_CPU( var, name )           { name, TYPE_S_CPU,    0 },
 #define PERFSTATUS_ARRAY( var, name, size )   { name, TYPE_S_ARRAY,  size },
-static struct {
-    char *name;
-    enum { TYPE_SINGLE, TYPE_CPU, TYPE_ARRAY,
-           TYPE_S_SINGLE, TYPE_S_CPU, TYPE_S_ARRAY
+static const struct {
+    const char *name;
+    enum { TYPE_SINGLE, TYPE_ARRAY,
+           TYPE_S_SINGLE, TYPE_S_ARRAY
     } type;
-    int nr_elements;
+    unsigned int nr_elements;
 } perfc_info[] = {
 #include <xen/perfc_defn.h>
 };
 
 #define NR_PERFCTRS (sizeof(perfc_info) / sizeof(perfc_info[0]))
 
-struct perfcounter perfcounters;
+DEFINE_PER_CPU(perfc_t[NUM_PERFCOUNTERS], perfcounters);
 
 void perfc_printall(unsigned char key)
 {
-    unsigned int i, j, sum;
+    unsigned int i, j;
     s_time_t now = NOW();
-    atomic_t *counters = (atomic_t *)&perfcounters;
 
     printk("Xen performance counters SHOW  (now = 0x%08X:%08X)\n",
            (u32)(now>>32), (u32)now);
 
-    for ( i = 0; i < NR_PERFCTRS; i++ ) 
-    {
+    for ( i = j = 0; i < NR_PERFCTRS; i++ )
+    {
+        unsigned int k, cpu;
+        unsigned long long sum = 0;
+
         printk("%-32s  ",  perfc_info[i].name);
         switch ( perfc_info[i].type )
         {
         case TYPE_SINGLE:
         case TYPE_S_SINGLE:
-            printk("TOTAL[%10d]", atomic_read(&counters[0]));
-            counters += 1;
-            break;
-        case TYPE_CPU:
-        case TYPE_S_CPU:
-            sum = 0;
-            for_each_online_cpu ( j )
-                sum += atomic_read(&counters[j]);
-            printk("TOTAL[%10u]", sum);
+            for_each_online_cpu ( cpu )
+                sum += per_cpu(perfcounters, cpu)[j];
+            printk("TOTAL[%12Lu]", sum);
+            if ( sum )
+            {
+                k = 0;
+                for_each_online_cpu ( cpu )
+                {
+                    if ( k > 0 && (k % 4) == 0 )
+                        printk("\n%46s", "");
+                    printk("  CPU%02u[%10"PRIperfc"u]", cpu, 
per_cpu(perfcounters, cpu)[j]);
+                    ++k;
+                }
+            }
+            ++j;
+            break;
+        case TYPE_ARRAY:
+        case TYPE_S_ARRAY:
+            for_each_online_cpu ( cpu )
+            {
+                perfc_t *counters = per_cpu(perfcounters, cpu) + j;
+
+                for ( k = 0; k < perfc_info[i].nr_elements; k++ )
+                    sum += counters[k];
+            }
+            printk("TOTAL[%12Lu]", sum);
             if (sum)
             {
-                for_each_online_cpu ( j )
-                    printk("  CPU%02d[%10d]", j, atomic_read(&counters[j]));
-            }
-            counters += NR_CPUS;
-            break;
-        case TYPE_ARRAY:
-        case TYPE_S_ARRAY:
-            for ( j = sum = 0; j < perfc_info[i].nr_elements; j++ )
-                sum += atomic_read(&counters[j]);
-            printk("TOTAL[%10u]", sum);
 #ifdef PERF_ARRAYS
-            if (sum)
-            {
-                for ( j = 0; j < perfc_info[i].nr_elements; j++ )
-                {
-                    if ( (j % 4) == 0 )
-                        printk("\n                 ");
-                    printk("  ARR%02d[%10d]", j, atomic_read(&counters[j]));
-                }
-            }
+                for ( k = 0; k < perfc_info[i].nr_elements; k++ )
+                {
+                    sum = 0;
+                    for_each_online_cpu ( cpu )
+                        sum += per_cpu(perfcounters, cpu)[j + k];
+                    if ( (k % 4) == 0 )
+                        printk("\n%16s", "");
+                    printk("  ARR%02u[%10Lu]", k, sum);
+                }
+#else
+                k = 0;
+                for_each_online_cpu ( cpu )
+                {
+                    perfc_t *counters = per_cpu(perfcounters, cpu) + j;
+                    unsigned int n;
+
+                    sum = 0;
+                    for ( n = 0; n < perfc_info[i].nr_elements; n++ )
+                        sum += counters[n];
+                    if ( k > 0 && (k % 4) == 0 )
+                        printk("\n%46s", "");
+                    printk("  CPU%02u[%10Lu]", cpu, sum);
+                    ++k;
+                }
 #endif
-            counters += j;
+            }
+            j += perfc_info[i].nr_elements;
             break;
         }
         printk("\n");
@@ -97,7 +114,6 @@ void perfc_reset(unsigned char key)
 {
     unsigned int i, j;
     s_time_t now = NOW();
-    atomic_t *counters = (atomic_t *)&perfcounters;
 
     if ( key != '\0' )
         printk("Xen performance counters RESET (now = 0x%08X:%08X)\n",
@@ -105,43 +121,39 @@ void perfc_reset(unsigned char key)
 
     /* leave STATUS counters alone -- don't reset */
 
-    for ( i = 0; i < NR_PERFCTRS; i++ ) 
-    {
-        switch ( perfc_info[i].type )
-        {
-        case TYPE_SINGLE:
-            atomic_set(&counters[0],0);
-        case TYPE_S_SINGLE:
-            counters += 1;
-            break;
-        case TYPE_CPU:
-            for ( j = 0; j < NR_CPUS; j++ )
-                atomic_set(&counters[j],0);
-        case TYPE_S_CPU:
-            counters += NR_CPUS;
-            break;
-        case TYPE_ARRAY:
-            for ( j = 0; j < perfc_info[i].nr_elements; j++ )
-                atomic_set(&counters[j],0);
-        case TYPE_S_ARRAY:
-            counters += perfc_info[i].nr_elements;
-            break;
-        }
-    }
-
-    arch_perfc_reset ();
+    for ( i = j = 0; i < NR_PERFCTRS; i++ )
+    {
+        unsigned int cpu;
+
+        switch ( perfc_info[i].type )
+        {
+        case TYPE_SINGLE:
+            for_each_cpu ( cpu )
+                per_cpu(perfcounters, cpu)[j] = 0;
+        case TYPE_S_SINGLE:
+            ++j;
+            break;
+        case TYPE_ARRAY:
+            for_each_cpu ( cpu )
+                memset(per_cpu(perfcounters, cpu) + j, 0,
+                       perfc_info[i].nr_elements * sizeof(perfc_t));
+        case TYPE_S_ARRAY:
+            j += perfc_info[i].nr_elements;
+            break;
+        }
+    }
+
+    arch_perfc_reset();
 }
 
 static xen_sysctl_perfc_desc_t perfc_d[NR_PERFCTRS];
 static xen_sysctl_perfc_val_t *perfc_vals;
-static int               perfc_nbr_vals;
+static unsigned int      perfc_nbr_vals;
 static int               perfc_init = 0;
 static int perfc_copy_info(XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc,
                            XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val)
 {
-    unsigned int i, j;
-    unsigned int v = 0;
-    atomic_t *counters = (atomic_t *)&perfcounters;
+    unsigned int i, j, v;
 
     /* We only copy the name and array-size information once. */
     if ( !perfc_init ) 
@@ -154,11 +166,7 @@ static int perfc_copy_info(XEN_GUEST_HAN
             {
             case TYPE_SINGLE:
             case TYPE_S_SINGLE:
-                perfc_d[i].nr_vals = 1;
-                break;
-            case TYPE_CPU:
-            case TYPE_S_CPU:
-                perfc_d[i].nr_vals = num_online_cpus();
+                perfc_d[i].nr_vals = num_possible_cpus();
                 break;
             case TYPE_ARRAY:
             case TYPE_S_ARRAY:
@@ -181,26 +189,31 @@ static int perfc_copy_info(XEN_GUEST_HAN
     arch_perfc_gather();
 
     /* We gather the counts together every time. */
-    for ( i = 0; i < NR_PERFCTRS; i++ )
-    {
-        switch ( perfc_info[i].type )
-        {
-        case TYPE_SINGLE:
-        case TYPE_S_SINGLE:
-            perfc_vals[v++] = atomic_read(&counters[0]);
-            counters += 1;
-            break;
-        case TYPE_CPU:
-        case TYPE_S_CPU:
-            for ( j = 0; j < perfc_d[i].nr_vals; j++ )
-                perfc_vals[v++] = atomic_read(&counters[j]);
-            counters += NR_CPUS;
-            break;
-        case TYPE_ARRAY:
-        case TYPE_S_ARRAY:
-            for ( j = 0; j < perfc_d[i].nr_vals; j++ )
-                perfc_vals[v++] = atomic_read(&counters[j]);
-            counters += perfc_info[i].nr_elements;
+    for ( i = j = v = 0; i < NR_PERFCTRS; i++ )
+    {
+        unsigned int cpu;
+
+        switch ( perfc_info[i].type )
+        {
+        case TYPE_SINGLE:
+        case TYPE_S_SINGLE:
+            for_each_cpu ( cpu )
+                perfc_vals[v++] = per_cpu(perfcounters, cpu)[j];
+            ++j;
+            break;
+        case TYPE_ARRAY:
+        case TYPE_S_ARRAY:
+            memset(perfc_vals + v, 0, perfc_d[i].nr_vals * 
sizeof(*perfc_vals));
+            for_each_cpu ( cpu )
+            {
+                perfc_t *counters = per_cpu(perfcounters, cpu) + j;
+                unsigned int k;
+
+                for ( k = 0; k < perfc_d[i].nr_vals; k++ )
+                    perfc_vals[v + k] += counters[k];
+            }
+            v += perfc_d[i].nr_vals;
+            j += perfc_info[i].nr_elements;
             break;
         }
     }
@@ -224,14 +237,12 @@ int perfc_control(xen_sysctl_perfc_op_t 
     switch ( pc->cmd )
     {
     case XEN_SYSCTL_PERFCOP_reset:
-        perfc_copy_info(pc->desc, pc->val);
+        rc = perfc_copy_info(pc->desc, pc->val);
         perfc_reset(0);
-        rc = 0;
         break;
 
     case XEN_SYSCTL_PERFCOP_query:
-        perfc_copy_info(pc->desc, pc->val);
-        rc = 0;
+        rc = perfc_copy_info(pc->desc, pc->val);
         break;
 
     default:
diff -r 3375391fb0c9 -r 96f167771979 
xen/include/asm-ia64/linux-xen/asm/asmmacro.h
--- a/xen/include/asm-ia64/linux-xen/asm/asmmacro.h     Tue Mar 27 16:23:52 
2007 +0100
+++ b/xen/include/asm-ia64/linux-xen/asm/asmmacro.h     Tue Mar 27 16:35:37 
2007 +0100
@@ -116,4 +116,8 @@ 2:{ .mib;                                           \
 # define dv_serialize_instruction
 #endif
 
+#ifdef PERF_COUNTERS
+#define PERFC(n) (THIS_CPU(perfcounters) + (IA64_PERFC_ ## n) * 4)
+#endif
+
 #endif /* _ASM_IA64_ASMMACRO_H */
diff -r 3375391fb0c9 -r 96f167771979 xen/include/asm-ia64/perfc_defn.h
--- a/xen/include/asm-ia64/perfc_defn.h Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/include/asm-ia64/perfc_defn.h Tue Mar 27 16:35:37 2007 +0100
@@ -84,7 +84,7 @@ PERFCOUNTER_ARRAY(fast_reflect,       "f
 PERFCOUNTER_ARRAY(fast_reflect,       "fast reflection", 0x80)
 
 PERFSTATUS(vhpt_nbr_entries,          "nbr of entries per VHPT")
-PERFSTATUS_CPU(vhpt_valid_entries,    "nbr of valid entries in VHPT")
+PERFSTATUS(vhpt_valid_entries,        "nbr of valid entries in VHPT")
 
 PERFCOUNTER_ARRAY(vmx_mmio_access,    "vmx_mmio_access", 8)
 PERFCOUNTER_CPU(vmx_pal_emul,         "vmx_pal_emul")
@@ -106,6 +106,8 @@ PERFSTATUS(privop_addr_##name##_overflow
 
 PERFPRIVOPADDR(get_ifa)
 PERFPRIVOPADDR(thash)
+
+#undef PERFPRIVOPADDR
 #endif
 
 // vhpt.c
diff -r 3375391fb0c9 -r 96f167771979 xen/include/asm-ia64/privop_stat.h
--- a/xen/include/asm-ia64/privop_stat.h        Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/include/asm-ia64/privop_stat.h        Tue Mar 27 16:35:37 2007 +0100
@@ -1,5 +1,5 @@
-#ifndef _XEN_UA64_PRIVOP_STAT_H
-#define _XEN_UA64_PRIVOP_STAT_H
+#ifndef _XEN_IA64_PRIVOP_STAT_H
+#define _XEN_IA64_PRIVOP_STAT_H
 #include <asm/config.h>
 #include <xen/types.h>
 #include <public/xen.h>
@@ -9,30 +9,23 @@ extern void gather_privop_addrs(void);
 extern void gather_privop_addrs(void);
 extern void reset_privop_addrs(void);
 
-#undef  PERFCOUNTER
 #define PERFCOUNTER(var, name)
-
-#undef  PERFCOUNTER_CPU
-#define PERFCOUNTER_CPU(var, name)
-
-#undef  PERFCOUNTER_ARRAY
 #define PERFCOUNTER_ARRAY(var, name, size)
 
-#undef  PERFSTATUS
 #define PERFSTATUS(var, name)
-
-#undef  PERFSTATUS_CPU
-#define PERFSTATUS_CPU(var, name)
-
-#undef  PERFSTATUS_ARRAY
 #define PERFSTATUS_ARRAY(var, name, size)
 
-#undef  PERFPRIVOPADDR
 #define PERFPRIVOPADDR(name) privop_inst_##name,
 
 enum privop_inst {
 #include <asm/perfc_defn.h>
 };
+
+#undef PERFCOUNTER
+#undef PERFCOUNTER_ARRAY
+
+#undef PERFSTATUS
+#undef PERFSTATUS_ARRAY
 
 #undef PERFPRIVOPADDR
 
@@ -45,4 +38,4 @@ extern void privop_count_addr(unsigned l
 #define reset_privop_addrs() do {} while (0)
 #endif
 
-#endif /* _XEN_UA64_PRIVOP_STAT_H */
+#endif /* _XEN_IA64_PRIVOP_STAT_H */
diff -r 3375391fb0c9 -r 96f167771979 xen/include/asm-x86/multicall.h
--- a/xen/include/asm-x86/multicall.h   Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/include/asm-x86/multicall.h   Tue Mar 27 16:35:37 2007 +0100
@@ -6,84 +6,94 @@
 #define __ASM_X86_MULTICALL_H__
 
 #include <xen/errno.h>
-#include <asm/asm_defns.h>
 
 #ifdef __x86_64__
 
 #define do_multicall_call(_call)                             \
     do {                                                     \
         __asm__ __volatile__ (                               \
-            "    movq  "STR(MULTICALL_op)"(%0),%%rax; "      \
+            "    movq  %c1(%0),%%rax; "                      \
+            "    leaq  hypercall_table(%%rip),%%rdi; "       \
             "    cmpq  $("STR(NR_hypercalls)"),%%rax; "      \
             "    jae   2f; "                                 \
-            "    leaq  hypercall_table(%%rip),%%rdi; "       \
-            "    leaq  (%%rdi,%%rax,8),%%rax; "              \
-            "    movq  "STR(MULTICALL_arg0)"(%0),%%rdi; "    \
-            "    movq  "STR(MULTICALL_arg1)"(%0),%%rsi; "    \
-            "    movq  "STR(MULTICALL_arg2)"(%0),%%rdx; "    \
-            "    movq  "STR(MULTICALL_arg3)"(%0),%%rcx; "    \
-            "    movq  "STR(MULTICALL_arg4)"(%0),%%r8; "     \
-            "    callq *(%%rax); "                           \
-            "1:  movq  %%rax,"STR(MULTICALL_result)"(%0)\n"  \
+            "    movq  (%%rdi,%%rax,8),%%rax; "              \
+            "    movq  %c2+0*%c3(%0),%%rdi; "                \
+            "    movq  %c2+1*%c3(%0),%%rsi; "                \
+            "    movq  %c2+2*%c3(%0),%%rdx; "                \
+            "    movq  %c2+3*%c3(%0),%%rcx; "                \
+            "    movq  %c2+4*%c3(%0),%%r8; "                 \
+            "    callq *%%rax; "                             \
+            "1:  movq  %%rax,%c4(%0)\n"                      \
             ".section .fixup,\"ax\"\n"                       \
             "2:  movq  $-"STR(ENOSYS)",%%rax\n"              \
             "    jmp   1b\n"                                 \
             ".previous\n"                                    \
-            : : "b" (_call)                                  \
+            :                                                \
+            : "b" (_call),                                   \
+              "i" (offsetof(__typeof__(*_call), op)),        \
+              "i" (offsetof(__typeof__(*_call), args)),      \
+              "i" (sizeof(*(_call)->args)),                  \
+              "i" (offsetof(__typeof__(*_call), result))     \
               /* all the caller-saves registers */           \
             : "rax", "rcx", "rdx", "rsi", "rdi",             \
               "r8",  "r9",  "r10", "r11" );                  \
     } while ( 0 )
 
-#define compat_multicall_call(_call)                              \
-    do {                                                          \
-        __asm__ __volatile__ (                                    \
-            "    movl  "STR(COMPAT_MULTICALL_op)"(%0),%%eax; "    \
-            "    leaq  compat_hypercall_table(%%rip),%%rdi; "     \
-            "    cmpl  $("STR(NR_hypercalls)"),%%eax; "           \
-            "    jae   2f; "                                      \
-            "    movq  (%%rdi,%%rax,8),%%rax; "                   \
-            "    movl  "STR(COMPAT_MULTICALL_arg0)"(%0),%%edi; "  \
-            "    movl  "STR(COMPAT_MULTICALL_arg1)"(%0),%%esi; "  \
-            "    movl  "STR(COMPAT_MULTICALL_arg2)"(%0),%%edx; "  \
-            "    movl  "STR(COMPAT_MULTICALL_arg3)"(%0),%%ecx; "  \
-            "    movl  "STR(COMPAT_MULTICALL_arg4)"(%0),%%r8d; "  \
-            "    callq *%%rax; "                                  \
-            "1:  movl  %%eax,"STR(COMPAT_MULTICALL_result)"(%0)\n"\
-            ".section .fixup,\"ax\"\n"                            \
-            "2:  movl  $-"STR(ENOSYS)",%%eax\n"                   \
-            "    jmp   1b\n"                                      \
-            ".previous\n"                                         \
-            : : "b" (_call)                                       \
-              /* all the caller-saves registers */                \
-            : "rax", "rcx", "rdx", "rsi", "rdi",                  \
-              "r8",  "r9",  "r10", "r11" );                       \
-    } while ( 0 )
+#define compat_multicall_call(_call)                         \
+        __asm__ __volatile__ (                               \
+            "    movl  %c1(%0),%%eax; "                      \
+            "    leaq  compat_hypercall_table(%%rip),%%rdi; "\
+            "    cmpl  $("STR(NR_hypercalls)"),%%eax; "      \
+            "    jae   2f; "                                 \
+            "    movq  (%%rdi,%%rax,8),%%rax; "              \
+            "    movl  %c2+0*%c3(%0),%%edi; "                \
+            "    movl  %c2+1*%c3(%0),%%esi; "                \
+            "    movl  %c2+2*%c3(%0),%%edx; "                \
+            "    movl  %c2+3*%c3(%0),%%ecx; "                \
+            "    movl  %c2+4*%c3(%0),%%r8d; "                \
+            "    callq *%%rax; "                             \
+            "1:  movl  %%eax,%c4(%0)\n"                      \
+            ".section .fixup,\"ax\"\n"                       \
+            "2:  movl  $-"STR(ENOSYS)",%%eax\n"              \
+            "    jmp   1b\n"                                 \
+            ".previous\n"                                    \
+            :                                                \
+            : "b" (_call),                                   \
+              "i" (offsetof(__typeof__(*_call), op)),        \
+              "i" (offsetof(__typeof__(*_call), args)),      \
+              "i" (sizeof(*(_call)->args)),                  \
+              "i" (offsetof(__typeof__(*_call), result))     \
+              /* all the caller-saves registers */           \
+            : "rax", "rcx", "rdx", "rsi", "rdi",             \
+              "r8",  "r9",  "r10", "r11" )                   \
 
 #else
 
 #define do_multicall_call(_call)                             \
-    do {                                                     \
         __asm__ __volatile__ (                               \
-            "    pushl "STR(MULTICALL_arg4)"(%0); "          \
-            "    pushl "STR(MULTICALL_arg3)"(%0); "          \
-            "    pushl "STR(MULTICALL_arg2)"(%0); "          \
-            "    pushl "STR(MULTICALL_arg1)"(%0); "          \
-            "    pushl "STR(MULTICALL_arg0)"(%0); "          \
-            "    movl  "STR(MULTICALL_op)"(%0),%%eax; "      \
+            "    movl  %c1(%0),%%eax; "                      \
+            "    pushl %c2+4*%c3(%0); "                      \
+            "    pushl %c2+3*%c3(%0); "                      \
+            "    pushl %c2+2*%c3(%0); "                      \
+            "    pushl %c2+1*%c3(%0); "                      \
+            "    pushl %c2+0*%c3(%0); "                      \
             "    cmpl  $("STR(NR_hypercalls)"),%%eax; "      \
             "    jae   2f; "                                 \
             "    call  *hypercall_table(,%%eax,4); "         \
-            "1:  movl  %%eax,"STR(MULTICALL_result)"(%0); "  \
+            "1:  movl  %%eax,%c4(%0); "                      \
             "    addl  $20,%%esp\n"                          \
             ".section .fixup,\"ax\"\n"                       \
             "2:  movl  $-"STR(ENOSYS)",%%eax\n"              \
             "    jmp   1b\n"                                 \
             ".previous\n"                                    \
-            : : "b" (_call)                                  \
+            :                                                \
+            : "bSD" (_call),                                 \
+              "i" (offsetof(__typeof__(*_call), op)),        \
+              "i" (offsetof(__typeof__(*_call), args)),      \
+              "i" (sizeof(*(_call)->args)),                  \
+              "i" (offsetof(__typeof__(*_call), result))     \
               /* all the caller-saves registers */           \
-            : "eax", "ecx", "edx" );                         \
-    } while ( 0 )
+            : "eax", "ecx", "edx" )                          \
 
 #endif
 
diff -r 3375391fb0c9 -r 96f167771979 xen/include/asm-x86/perfc_defn.h
--- a/xen/include/asm-x86/perfc_defn.h  Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/include/asm-x86/perfc_defn.h  Tue Mar 27 16:35:37 2007 +0100
@@ -18,9 +18,11 @@ PERFCOUNTER_CPU(apic_timer,             
 
 PERFCOUNTER_CPU(domain_page_tlb_flush,  "domain page tlb flushes")
 
-PERFCOUNTER_CPU(calls_to_mmu_update,    "calls_to_mmu_update")
-PERFCOUNTER_CPU(num_page_updates,       "num_page_updates")
-PERFCOUNTER_CPU(calls_to_update_va,     "calls_to_update_va_map")
+PERFCOUNTER(calls_to_mmuext_op,         "calls to mmuext_op")
+PERFCOUNTER(num_mmuext_ops,             "mmuext ops")
+PERFCOUNTER(calls_to_mmu_update,        "calls to mmu_update")
+PERFCOUNTER(num_page_updates,           "page updates")
+PERFCOUNTER(calls_to_update_va,         "calls to update_va_map")
 PERFCOUNTER_CPU(page_faults,            "page faults")
 PERFCOUNTER_CPU(copy_user_faults,       "copy_user faults")
 
diff -r 3375391fb0c9 -r 96f167771979 xen/include/asm-x86/x86_32/asm_defns.h
--- a/xen/include/asm-x86/x86_32/asm_defns.h    Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/include/asm-x86/x86_32/asm_defns.h    Tue Mar 27 16:35:37 2007 +0100
@@ -1,5 +1,7 @@
 #ifndef __X86_32_ASM_DEFNS_H__
 #define __X86_32_ASM_DEFNS_H__
+
+#include <asm/percpu.h>
 
 #ifndef NDEBUG
 /* Indicate special exception stack frame by inverting the frame pointer. */
@@ -47,10 +49,14 @@
         1:
 
 #ifdef PERF_COUNTERS
-#define PERFC_INCR(_name,_idx)                          \
-        lock incl perfcounters+_name(,_idx,4)
+#define PERFC_INCR(_name,_idx,_cur)                     \
+        pushl _cur;                                     \
+        movl VCPU_processor(_cur),_cur;                 \
+        shll $PERCPU_SHIFT,_cur;                        \
+        incl per_cpu__perfcounters+_name*4(_cur,_idx,4);\
+        popl _cur
 #else
-#define PERFC_INCR(_name,_idx)
+#define PERFC_INCR(_name,_idx,_cur)
 #endif
 
 #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
diff -r 3375391fb0c9 -r 96f167771979 xen/include/asm-x86/x86_64/asm_defns.h
--- a/xen/include/asm-x86/x86_64/asm_defns.h    Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/include/asm-x86/x86_64/asm_defns.h    Tue Mar 27 16:35:37 2007 +0100
@@ -1,5 +1,7 @@
 #ifndef __X86_64_ASM_DEFNS_H__
 #define __X86_64_ASM_DEFNS_H__
+
+#include <asm/percpu.h>
 
 #ifndef NDEBUG
 /* Indicate special exception stack frame by inverting the frame pointer. */
@@ -47,13 +49,18 @@
         popq  %rdi;
 
 #ifdef PERF_COUNTERS
-#define PERFC_INCR(_name,_idx)                  \
-    pushq %rdx;                                 \
-    leaq perfcounters+_name(%rip),%rdx;         \
-    lock incl (%rdx,_idx,4);                    \
-    popq %rdx;
+#define PERFC_INCR(_name,_idx,_cur)             \
+        pushq _cur;                             \
+        movslq VCPU_processor(_cur),_cur;       \
+        pushq %rdx;                             \
+        leaq per_cpu__perfcounters(%rip),%rdx;  \
+        shlq $PERCPU_SHIFT,_cur;                \
+        addq %rdx,_cur;                         \
+        popq %rdx;                              \
+        incl _name*4(_cur,_idx,4);              \
+        popq _cur
 #else
-#define PERFC_INCR(_name,_idx)
+#define PERFC_INCR(_name,_idx,_cur)
 #endif
 
 /* Work around AMD erratum #88 */
diff -r 3375391fb0c9 -r 96f167771979 xen/include/xen/perfc.h
--- a/xen/include/xen/perfc.h   Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/include/xen/perfc.h   Tue Mar 27 16:35:37 2007 +0100
@@ -6,102 +6,94 @@
 
 #include <xen/lib.h>
 #include <xen/smp.h>
-#include <asm/atomic.h>
+#include <xen/percpu.h>
 
 /* 
  * NOTE: new counters must be defined in perfc_defn.h
  * 
  * PERFCOUNTER (counter, string)              define a new performance counter
- * PERFCOUNTER_CPU (counter, string, size)    define a counter per CPU
- * PERFCOUNTER_ARRY (counter, string, size)   define an array of counters
+ * PERFCOUNTER_ARRAY (counter, string, size)  define an array of counters
  * 
  * unlike "COUNTERS", "STATUS" variables DO NOT RESET
  * PERFSTATUS (counter, string)               define a new performance stauts
- * PERFSTATUS_CPU (counter, string, size)     define a status var per CPU
- * PERFSTATUS_ARRY (counter, string, size)    define an array of status vars
+ * PERFSTATUS_ARRAY (counter, string, size)   define an array of status vars
  * 
  * unsigned long perfc_value  (counter)        get value of a counter  
- * unsigned long perfc_valuec (counter)        get value of a per CPU counter
  * unsigned long perfc_valuea (counter, index) get value of an array counter
  * unsigned long perfc_set  (counter, val)     set value of a counter  
- * unsigned long perfc_setc (counter, val)     set value of a per CPU counter
  * unsigned long perfc_seta (counter, index, val) set value of an array counter
  * void perfc_incr  (counter)                  increment a counter          
- * void perfc_incrc (counter, index)           increment a per CPU counter   
+ * void perfc_decr  (counter)                  decrement a status
  * void perfc_incra (counter, index)           increment an array counter   
  * void perfc_add   (counter, value)           add a value to a counter     
- * void perfc_addc  (counter, value)           add a value to a per CPU counter
  * void perfc_adda  (counter, index, value)    add a value to array counter 
  * void perfc_print (counter)                  print out the counter
  */
 
-#define PERFCOUNTER( var, name ) \
-  atomic_t var[1];
-#define PERFCOUNTER_CPU( var, name ) \
-  atomic_t var[NR_CPUS];
-#define PERFCOUNTER_ARRAY( var, name, size ) \
-  atomic_t var[size];
-#define PERFSTATUS( var, name ) \
-  atomic_t var[1];
-#define PERFSTATUS_CPU( var, name ) \
-  atomic_t var[NR_CPUS];
-#define PERFSTATUS_ARRAY( var, name, size ) \
-  atomic_t var[size];
+#define PERFCOUNTER( name, descr ) \
+  PERFC_ ## name,
+#define PERFCOUNTER_ARRAY( name, descr, size ) \
+  PERFC_ ## name,                              \
+  PERFC_LAST_ ## name = PERFC_ ## name + (size) - sizeof(char[2 * !!(size) - 
1]),
 
-struct perfcounter {
+#define PERFSTATUS       PERFCOUNTER
+#define PERFSTATUS_ARRAY PERFCOUNTER_ARRAY
+
+/* Compatibility: This should go away once all users got converted. */
+#define PERFCOUNTER_CPU PERFCOUNTER
+
+enum perfcounter {
 #include <xen/perfc_defn.h>
+       NUM_PERFCOUNTERS
 };
 
-extern struct perfcounter perfcounters;
+#undef PERFCOUNTER
+#undef PERFCOUNTER_ARRAY
+#undef PERFSTATUS
+#undef PERFSTATUS_ARRAY
 
-#define perfc_value(x)    atomic_read(&perfcounters.x[0])
-#define perfc_valuec(x)   atomic_read(&perfcounters.x[smp_processor_id()])
+typedef unsigned perfc_t;
+#define PRIperfc ""
+
+DECLARE_PER_CPU(perfc_t[NUM_PERFCOUNTERS], perfcounters);
+
+#define perfc_value(x)    this_cpu(perfcounters)[PERFC_ ## x]
 #define perfc_valuea(x,y)                                               \
-    ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ?       \
-       atomic_read(&perfcounters.x[y]) : 0 )
-#define perfc_set(x,v)    atomic_set(&perfcounters.x[0], v)
-#define perfc_setc(x,v)   atomic_set(&perfcounters.x[smp_processor_id()], v)
+    ( (y) <= PERFC_LAST_ ## x - PERFC_ ## x ?                           \
+        this_cpu(perfcounters)[PERFC_ ## x + (y)] : 0 )
+#define perfc_set(x,v)    (this_cpu(perfcounters)[PERFC_ ## x] = (v))
 #define perfc_seta(x,y,v)                                               \
-    do {                                                                \
-        if ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ) \
-            atomic_set(&perfcounters.x[y], v);                          \
-    } while ( 0 )
-#define perfc_incr(x)     atomic_inc(&perfcounters.x[0])
-#define perfc_decr(x)     atomic_dec(&perfcounters.x[0])
-#define perfc_incrc(x)    atomic_inc(&perfcounters.x[smp_processor_id()])
-#define perfc_decrc(x)    atomic_dec(&perfcounters.x[smp_processor_id()])
+    ( (y) <= PERFC_LAST_ ## x - PERFC_ ## x ?                           \
+        this_cpu(perfcounters)[PERFC_ ## x + (y)] = (v) : (v) )
+#define perfc_incr(x)     (++this_cpu(perfcounters)[PERFC_ ## x])
+#define perfc_decr(x)     (--this_cpu(perfcounters)[PERFC_ ## x])
 #define perfc_incra(x,y)                                                \
-    do {                                                                \
-        if ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ) \
-            atomic_inc(&perfcounters.x[y]);                             \
-    } while ( 0 )
-#define perfc_add(x,y)    atomic_add((y), &perfcounters.x[0])
-#define perfc_addc(x,y)   atomic_add((y), &perfcounters.x[smp_processor_id()])
-#define perfc_adda(x,y,z)                                               \
-    do {                                                                \
-        if ( (y) < (sizeof(perfcounters.x) / sizeof(*perfcounters.x)) ) \
-            atomic_add((z), &perfcounters.x[y]);                        \
-    } while ( 0 )
+    ( (y) <= PERFC_LAST_ ## x - PERFC_ ## x ?                           \
+        ++this_cpu(perfcounters)[PERFC_ ## x + (y)] : 0 )
+#define perfc_add(x,v)    (this_cpu(perfcounters)[PERFC_ ## x] += (v))
+#define perfc_adda(x,y,v)                                               \
+    ( (y) <= PERFC_LAST_ ## x - PERFC_ ## x ?                           \
+        this_cpu(perfcounters)[PERFC_ ## x + (y)] = (v) : (v) )
 
 /*
  * Histogram: special treatment for 0 and 1 count. After that equally spaced 
  * with last bucket taking the rest.
  */
 #ifdef PERF_ARRAYS
-#define perfc_incr_histo(_x,_v,_n)                                          \
-    do {                                                                    \
-        if ( (_v) == 0 )                                                    \
-            perfc_incra(_x, 0);                                             \
-        else if ( (_v) == 1 )                                               \
-            perfc_incra(_x, 1);                                             \
-        else if ( (((_v)-2) / PERFC_ ## _n ## _BUCKET_SIZE) <               \
-                  (PERFC_MAX_ ## _n - 3) )                                  \
-            perfc_incra(_x, (((_v)-2) / PERFC_ ## _n ## _BUCKET_SIZE) + 2); \
-        else                                                                \
-            perfc_incra(_x, PERFC_MAX_ ## _n - 1);                          \
+#define perfc_incr_histo(x,v)                                           \
+    do {                                                                \
+        if ( (v) == 0 )                                                 \
+            perfc_incra(x, 0);                                          \
+        else if ( (v) == 1 )                                            \
+            perfc_incra(x, 1);                                          \
+        else if ( (((v) - 2) / PERFC_ ## x ## _BUCKET_SIZE) <           \
+                  (PERFC_LAST_ ## x - PERFC_ ## x - 2) )                \
+            perfc_incra(x, (((v) - 2) / PERFC_ ## x ## _BUCKET_SIZE) + 2); \
+        else                                                            \
+            perfc_incra(x, PERFC_LAST_ ## x - PERFC_ ## x);             \
     } while ( 0 )
 #else
-#define perfc_incr_histo(_x,_v,_n) ((void)0)
+#define perfc_incr_histo(x,v) ((void)0)
 #endif
 
 struct xen_sysctl_perfc_op;
@@ -110,22 +102,20 @@ int perfc_control(struct xen_sysctl_perf
 #else /* PERF_COUNTERS */
 
 #define perfc_value(x)    (0)
-#define perfc_valuec(x)   (0)
 #define perfc_valuea(x,y) (0)
 #define perfc_set(x,v)    ((void)0)
-#define perfc_setc(x,v)   ((void)0)
 #define perfc_seta(x,y,v) ((void)0)
 #define perfc_incr(x)     ((void)0)
 #define perfc_decr(x)     ((void)0)
-#define perfc_incrc(x)    ((void)0)
-#define perfc_decrc(x)    ((void)0)
 #define perfc_incra(x,y)  ((void)0)
 #define perfc_decra(x,y)  ((void)0)
 #define perfc_add(x,y)    ((void)0)
-#define perfc_addc(x,y)   ((void)0)
 #define perfc_adda(x,y,z) ((void)0)
 #define perfc_incr_histo(x,y,z) ((void)0)
 
 #endif /* PERF_COUNTERS */
 
+/* Compatibility: This should go away once all users got converted. */
+#define perfc_incrc     perfc_incr
+
 #endif /* __XEN_PERFC_H__ */
diff -r 3375391fb0c9 -r 96f167771979 xen/include/xen/perfc_defn.h
--- a/xen/include/xen/perfc_defn.h      Tue Mar 27 16:23:52 2007 +0100
+++ b/xen/include/xen/perfc_defn.h      Tue Mar 27 16:35:37 2007 +0100
@@ -5,6 +5,9 @@
 #include <asm/perfc_defn.h>
 
 PERFCOUNTER_ARRAY(hypercalls,           "hypercalls", NR_hypercalls)
+
+PERFCOUNTER(calls_to_multicall,         "calls to multicall")
+PERFCOUNTER(calls_from_multicall,       "calls from multicall")
 
 PERFCOUNTER_CPU(irqs,                   "#interrupts")
 PERFCOUNTER_CPU(ipis,                   "#IPIs")

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] xen: Make all performance counter per-cpu, avoiding the need to update, Xen patchbot-unstable <=