WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] RE: xenpm: provide core/package cstate residencies

To: "Wei, Gang" <gang.wei@xxxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] RE: xenpm: provide core/package cstate residencies
From: "Wei, Gang" <gang.wei@xxxxxxxxx>
Date: Mon, 12 Jul 2010 23:22:07 +0800
Accept-language: zh-CN, en-US
Acceptlanguage: zh-CN, en-US
Cc: Keir Fraser <keir.fraser@xxxxxxxxxxxxx>, "Wei, Gang" <gang.wei@xxxxxxxxx>
Delivery-date: Mon, 12 Jul 2010 08:23:30 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <F26D193E20BBDC42A43B611D1BDEDE7113913C8C50@xxxxxxxxxxxxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <F26D193E20BBDC42A43B611D1BDEDE7113913C8C50@xxxxxxxxxxxxxxxxxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: Acshsc/VO7ereUpQSbq6nuh7lUSyMAAIr3Mw
Thread-topic: xenpm: provide core/package cstate residencies
Resend it.

=======
xenpm: provide core/package cstate residencies

According to Intel 64 and IA32 Architectures SDM 3B Appendix B, Intel 
Nehalem/Westmere processors provide h/w MSR to report the core/package cstate 
residencies.Extend sysctl_get_pmstat interface to pass the core/package cstate 
residencies, and modify xenpm to output those information.

Signed-off-by: Wei Gang <gang.wei@xxxxxxxxx>

diff -r 1af6303f103c tools/libxc/xc_pm.c
--- a/tools/libxc/xc_pm.c       Mon Jul 12 14:12:54 2010 +0800
+++ b/tools/libxc/xc_pm.c       Mon Jul 12 22:16:04 2010 +0800
@@ -152,6 +152,11 @@ int xc_pm_get_cxstat(xc_interface *xch, 
     cxpt->nr = sysctl.u.get_pmstat.u.getcx.nr;
     cxpt->last = sysctl.u.get_pmstat.u.getcx.last;
     cxpt->idle_time = sysctl.u.get_pmstat.u.getcx.idle_time;
+    cxpt->pc3 = sysctl.u.get_pmstat.u.getcx.pc3;
+    cxpt->pc6 = sysctl.u.get_pmstat.u.getcx.pc6;
+    cxpt->pc7 = sysctl.u.get_pmstat.u.getcx.pc7;
+    cxpt->cc3 = sysctl.u.get_pmstat.u.getcx.cc3;
+    cxpt->cc6 = sysctl.u.get_pmstat.u.getcx.cc6;
 
 unlock_3:
     unlock_pages(cxpt->residencies, max_cx * sizeof(uint64_t));
diff -r 1af6303f103c tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Mon Jul 12 14:12:54 2010 +0800
+++ b/tools/libxc/xenctrl.h     Mon Jul 12 22:16:04 2010 +0800
@@ -1393,6 +1393,11 @@ struct xc_cx_stat {
     uint64_t idle_time;    /* idle time from boot */
     uint64_t *triggers;    /* Cx trigger counts */
     uint64_t *residencies; /* Cx residencies */
+    uint64_t pc3;
+    uint64_t pc6;
+    uint64_t pc7;
+    uint64_t cc3;
+    uint64_t cc6;
 };
 typedef struct xc_cx_stat xc_cx_stat_t;
 
diff -r 1af6303f103c tools/misc/xenpm.c
--- a/tools/misc/xenpm.c        Mon Jul 12 14:12:54 2010 +0800
+++ b/tools/misc/xenpm.c        Mon Jul 12 22:16:04 2010 +0800
@@ -15,6 +15,7 @@
  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  * Place - Suite 330, Boston, MA 02111-1307 USA.
  */
+#define MAX_NR_CPU 512
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -91,6 +92,13 @@ static void print_cxstat(int cpuid, stru
         printf("                       residency  [%020"PRIu64" ms]\n",
                cxstat->residencies[i]/1000000UL);
     }
+    printf("pc3                  : [%020"PRIu64" ms]\n"
+           "pc6                  : [%020"PRIu64" ms]\n"
+           "pc7                  : [%020"PRIu64" ms]\n",
+           cxstat->pc3/1000000UL, cxstat->pc6/1000000UL, 
cxstat->pc7/1000000UL);
+    printf("cc3                  : [%020"PRIu64" ms]\n"
+           "cc6                  : [%020"PRIu64" ms]\n",
+           cxstat->cc3/1000000UL, cxstat->cc6/1000000UL);
     printf("\n");
 }
 
@@ -306,9 +314,13 @@ static uint64_t *sum, *sum_cx, *sum_px;
 
 static void signal_int_handler(int signo)
 {
-    int i, j;
+    int i, j, k, ret;
     struct timeval tv;
     int cx_cap = 0, px_cap = 0;
+    uint32_t cpu_to_core[MAX_NR_CPU];
+    uint32_t cpu_to_socket[MAX_NR_CPU];
+    uint32_t cpu_to_node[MAX_NR_CPU];
+    xc_topologyinfo_t info = { 0 };
 
     if ( gettimeofday(&tv, NULL) == -1 )
     {
@@ -369,6 +381,93 @@ static void signal_int_handler(int signo
                     pxstat_start[i].pt[j].residency;
                 printf("  P%d\t%"PRIu64"\t(%5.2f%%)\n", j,
                         res / 1000000UL, 100UL * res / (double)sum_px[i]);
+            }
+        }
+    }
+
+    set_xen_guest_handle(info.cpu_to_core, cpu_to_core);
+    set_xen_guest_handle(info.cpu_to_socket, cpu_to_socket);
+    set_xen_guest_handle(info.cpu_to_node, cpu_to_node);
+    info.max_cpu_index = MAX_NR_CPU - 1;
+
+    ret = xc_topologyinfo(xc_handle, &info);
+    if ( !ret )
+    {
+        uint32_t socket_ids[MAX_NR_CPU];
+        uint32_t core_ids[MAX_NR_CPU];
+        uint32_t socket_nr = 0;
+        uint32_t core_nr = 0;
+
+        if ( info.max_cpu_index > MAX_NR_CPU - 1 )
+            info.max_cpu_index = MAX_NR_CPU - 1;
+        /* check validity */
+        for ( i = 0; i <= info.max_cpu_index; i++ )
+        {
+            if ( cpu_to_core[i] == INVALID_TOPOLOGY_ID ||
+                 cpu_to_socket[i] == INVALID_TOPOLOGY_ID )
+                break;
+        }
+        if ( i > info.max_cpu_index )
+        {
+            /* find socket nr & core nr per socket */
+            for ( i = 0; i <= info.max_cpu_index; i++ )
+            {
+                for ( j = 0; j < socket_nr; j++ )
+                    if ( cpu_to_socket[i] == socket_ids[j] )
+                        break;
+                if ( j == socket_nr )
+                {
+                    socket_ids[j] = cpu_to_socket[i];
+                    socket_nr++;
+                }
+
+                for ( j = 0; j < core_nr; j++ )
+                    if ( cpu_to_core[i] == core_ids[j] )
+                        break;
+                if ( j == core_nr )
+                {
+                    core_ids[j] = cpu_to_core[i];
+                    core_nr++;
+                }
+            }
+
+            /* print out CC? and PC? */
+            for ( i = 0; i < socket_nr; i++ )
+            {
+                uint64_t res;
+                for ( j = 0; j <= info.max_cpu_index; j++ )
+                {
+                    if ( cpu_to_socket[j] == socket_ids[i] )
+                        break;
+                }
+                printf("Socket %d\n", socket_ids[i]);
+                res = cxstat_end[j].pc3 - cxstat_start[j].pc3;
+                printf("\tPC3\t%"PRIu64" ms\t%.2f%%\n",  res / 1000000UL, 
+                       100UL * res / (double)sum_cx[j]);
+                res = cxstat_end[j].pc6 - cxstat_start[j].pc6;
+                printf("\tPC6\t%"PRIu64" ms\t%.2f%%\n",  res / 1000000UL, 
+                       100UL * res / (double)sum_cx[j]);
+                res = cxstat_end[j].pc7 - cxstat_start[j].pc7;
+                printf("\tPC7\t%"PRIu64" ms\t%.2f%%\n",  res / 1000000UL, 
+                       100UL * res / (double)sum_cx[j]);
+                for ( k = 0; k < core_nr; k++ )
+                {
+                    for ( j = 0; j <= info.max_cpu_index; j++ )
+                    {
+                        if ( cpu_to_socket[j] == socket_ids[i] &&
+                             cpu_to_core[j] == core_ids[k] )
+                            break;
+                    }
+                    printf("\t Core %d CPU %d\n", core_ids[k], j);
+                    res = cxstat_end[j].cc3 - cxstat_start[j].cc3;
+                    printf("\t\tCC3\t%"PRIu64" ms\t%.2f%%\n",  res / 
1000000UL, 
+                           100UL * res / (double)sum_cx[j]);
+                    res = cxstat_end[j].cc6 - cxstat_start[j].cc6;
+                    printf("\t\tCC6\t%"PRIu64" ms\t%.2f%%\n",  res / 
1000000UL, 
+                           100UL * res / (double)sum_cx[j]);
+                    printf("\n");
+
+                }
             }
         }
         printf("  Avg freq\t%d\tKHz\n", avgfreq[i]);
@@ -833,8 +932,6 @@ out:
     fprintf(stderr, "failed to set governor name\n");
 }
 
-#define MAX_NR_CPU 512
-
 void cpu_topology_func(int argc, char *argv[])
 {
     uint32_t cpu_to_core[MAX_NR_CPU];
diff -r 1af6303f103c xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c      Mon Jul 12 14:12:54 2010 +0800
+++ b/xen/arch/x86/acpi/cpu_idle.c      Mon Jul 12 22:58:25 2010 +0800
@@ -55,6 +55,14 @@
 
 /*#define DEBUG_PM_CX*/
 
+#define GET_HW_RES_IN_NS(msr, val) \
+    do { rdmsrl(msr, val); val = tsc_ticks2ns(val); } while( 0 )
+#define GET_PC3_RES(val)  GET_HW_RES_IN_NS(0x3F8, val)
+#define GET_PC6_RES(val)  GET_HW_RES_IN_NS(0x3F9, val)
+#define GET_PC7_RES(val)  GET_HW_RES_IN_NS(0x3FA, val)
+#define GET_CC3_RES(val)  GET_HW_RES_IN_NS(0x3FC, val)
+#define GET_CC6_RES(val)  GET_HW_RES_IN_NS(0x3FD, val)
+
 static void lapic_timer_nop(void) { }
 static void (*lapic_timer_off)(void);
 static void (*lapic_timer_on)(void);
@@ -75,6 +83,63 @@ boolean_param("lapic_timer_c2_ok", local
 boolean_param("lapic_timer_c2_ok", local_apic_timer_c2_ok);
 
 static struct acpi_processor_power *__read_mostly processor_powers[NR_CPUS];
+
+struct hw_residencies
+{
+    uint64_t pc3;
+    uint64_t pc6;
+    uint64_t pc7;
+    uint64_t cc3;
+    uint64_t cc6;
+};
+
+static void do_get_hw_residencies(void *arg)
+{
+    struct cpuinfo_x86 *c = &current_cpu_data;
+    struct hw_residencies *hw_res = (struct hw_residencies *)arg;
+
+    if ( c->x86_vendor != X86_VENDOR_INTEL || c->x86 != 6 )
+        return;
+
+    switch ( c->x86_model )
+    {
+    /* Nehalem */
+    case 0x1A:
+    case 0x1E:
+    case 0x1F:
+    case 0x2E:
+    /* Westmere */
+    case 0x25:
+    case 0x2C:
+        GET_PC3_RES(hw_res->pc3);
+        GET_PC6_RES(hw_res->pc6);
+        GET_PC7_RES(hw_res->pc7);
+        GET_CC3_RES(hw_res->cc3);
+        GET_CC6_RES(hw_res->cc6);
+        break;
+    }
+}
+
+static void get_hw_residencies(uint32_t cpu, struct hw_residencies *hw_res)
+{
+    if ( smp_processor_id() == cpu )
+        do_get_hw_residencies((void *)hw_res);
+    else
+        on_selected_cpus(cpumask_of(cpu),
+                         do_get_hw_residencies, (void *)hw_res, 1);
+}
+
+static void print_hw_residencies(uint32_t cpu)
+{
+    struct hw_residencies hw_res = {0};
+
+    get_hw_residencies(cpu, &hw_res);
+
+    printk("PC3[%"PRId64"] PC6[%"PRId64"] PC7[%"PRId64"]\n",
+           hw_res.pc3, hw_res.pc6, hw_res.pc7);
+    printk("CC3[%"PRId64"] CC6[%"PRId64"]\n",
+           hw_res.cc3, hw_res.cc6);
+}
 
 static char* acpi_cstate_method_name[] =
 {
@@ -113,6 +178,7 @@ static void print_acpi_power(uint32_t cp
     printk("    C0:\tusage[%08d] duration[%"PRId64"]\n",
            idle_usage, NOW() - idle_res);
 
+    print_hw_residencies(cpu);
 }
 
 static void dump_cx(unsigned char key)
@@ -933,6 +999,7 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
     const struct acpi_processor_power *power = processor_powers[cpuid];
     uint64_t usage, res, idle_usage = 0, idle_res = 0;
     int i;
+    struct hw_residencies hw_res = {0};
 
     if ( power == NULL )
     {
@@ -965,6 +1032,14 @@ int pmstat_get_cx_stat(uint32_t cpuid, s
             return -EFAULT;
     }
 
+    get_hw_residencies(cpuid, &hw_res);
+
+    stat->pc3 = hw_res.pc3;
+    stat->pc6 = hw_res.pc6;
+    stat->pc7 = hw_res.pc7;
+    stat->cc3 = hw_res.cc3;
+    stat->cc6 = hw_res.cc6;
+
     return 0;
 }
 
diff -r 1af6303f103c xen/arch/x86/time.c
--- a/xen/arch/x86/time.c       Mon Jul 12 14:12:54 2010 +0800
+++ b/xen/arch/x86/time.c       Mon Jul 12 22:16:04 2010 +0800
@@ -785,6 +785,13 @@ s_time_t get_s_time(void)
     now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
 
     return now;
+}
+
+uint64_t tsc_ticks2ns(uint64_t ticks)
+{
+    struct cpu_time *t = &this_cpu(cpu_time);
+
+    return scale_delta(ticks, &t->tsc_scale);
 }
 
 /* Explicitly OR with 1 just in case version number gets out of sync. */
diff -r 1af6303f103c xen/include/asm-x86/time.h
--- a/xen/include/asm-x86/time.h        Mon Jul 12 14:12:54 2010 +0800
+++ b/xen/include/asm-x86/time.h        Mon Jul 12 22:16:04 2010 +0800
@@ -56,6 +56,8 @@ uint64_t acpi_pm_tick_to_ns(uint64_t tic
 uint64_t acpi_pm_tick_to_ns(uint64_t ticks);
 uint64_t ns_to_acpi_pm_tick(uint64_t ns);
 
+uint64_t tsc_ticks2ns(uint64_t ticks);
+
 void pv_soft_rdtsc(struct vcpu *v, struct cpu_user_regs *regs, int rdtscp);
 u64 gtime_to_gtsc(struct domain *d, u64 tsc);
 
diff -r 1af6303f103c xen/include/public/sysctl.h
--- a/xen/include/public/sysctl.h       Mon Jul 12 14:12:54 2010 +0800
+++ b/xen/include/public/sysctl.h       Mon Jul 12 22:16:04 2010 +0800
@@ -223,6 +223,11 @@ struct pm_cx_stat {
     uint64_aligned_t idle_time;                 /* idle time from boot */
     XEN_GUEST_HANDLE_64(uint64) triggers;    /* Cx trigger counts */
     XEN_GUEST_HANDLE_64(uint64) residencies; /* Cx residencies */
+    uint64_aligned_t pc3;
+    uint64_aligned_t pc6;
+    uint64_aligned_t pc7;
+    uint64_aligned_t cc3;
+    uint64_aligned_t cc6;
 };
 
 struct xen_sysctl_get_pmstat {

Attachment: xenpm-package-cstate-v2.patch
Description: xenpm-package-cstate-v2.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel