WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] Add basic acpi C-states based cpu idle po

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] Add basic acpi C-states based cpu idle power mgmt in xen for x86.
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Thu, 01 May 2008 03:00:20 -0700
Delivery-date: Thu, 01 May 2008 07:47:23 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1209634801 -3600
# Node ID 5bb9093eb0e9abfcf8537148ba088fd2604e5737
# Parent  ad55c06c9bbc31e4e3db2944f3a1fcbf842bd4aa
Add basic acpi C-states based cpu idle power mgmt in xen for x86.

It includes:
 1. hypercall definition for passing ACPI info.
 2. C1/C2 support.
 3. Mwait support, as well as legacy ioport.
 4. Ladder policy from Linux kernel.

A lot of code & ideas came from Linux.

Signed-off-by: Wei Gang <gang.wei@xxxxxxxxx>
---
 xen/arch/x86/acpi/Makefile               |    2 
 xen/arch/x86/acpi/cpu_idle.c             |  690 +++++++++++++++++++++++++++++++
 xen/arch/x86/domain.c                    |    5 
 xen/arch/x86/platform_hypercall.c        |   23 +
 xen/arch/x86/x86_64/Makefile             |    2 
 xen/arch/x86/x86_64/cpu_idle.c           |  128 +++++
 xen/arch/x86/x86_64/platform_hypercall.c |    4 
 xen/include/public/platform.h            |   65 ++
 xen/include/xlat.lst                     |    5 
 9 files changed, 922 insertions(+), 2 deletions(-)

diff -r ad55c06c9bbc -r 5bb9093eb0e9 xen/arch/x86/acpi/Makefile
--- a/xen/arch/x86/acpi/Makefile        Thu May 01 10:33:03 2008 +0100
+++ b/xen/arch/x86/acpi/Makefile        Thu May 01 10:40:01 2008 +0100
@@ -1,2 +1,2 @@ obj-y += boot.o
 obj-y += boot.o
-obj-y += power.o suspend.o wakeup_prot.o
+obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o
diff -r ad55c06c9bbc -r 5bb9093eb0e9 xen/arch/x86/acpi/cpu_idle.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/acpi/cpu_idle.c      Thu May 01 10:40:01 2008 +0100
@@ -0,0 +1,690 @@
+/*
+ * cpu_idle - xen idle state module derived from Linux 
+ *            drivers/acpi/processor_idle.c & 
+ *            arch/x86/kernel/acpi/cstate.c
+ *
+ *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@xxxxxxxxx>
+ *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx>
+ *  Copyright (C) 2004, 2005 Dominik Brodowski <linux@xxxxxxxx>
+ *  Copyright (C) 2004  Anil S Keshavamurthy <anil.s.keshavamurthy@xxxxxxxxx>
+ *                      - Added processor hotplug support
+ *  Copyright (C) 2005  Venkatesh Pallipadi <venkatesh.pallipadi@xxxxxxxxx>
+ *                      - Added support for C3 on SMP
+ *  Copyright (C) 2007, 2008 Intel Corporation
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/lib.h>
+#include <xen/types.h>
+#include <xen/acpi.h>
+#include <xen/smp.h>
+#include <asm/cache.h>
+#include <asm/io.h>
+#include <xen/guest_access.h>
+#include <public/platform.h>
+#include <asm/processor.h>
+#include <xen/keyhandler.h>
+
+#define DEBUG_PM_CX
+
+#define US_TO_PM_TIMER_TICKS(t)     ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
+#define C2_OVERHEAD         4   /* 1us (3.579 ticks per us) */
+#define C3_OVERHEAD         4   /* 1us (3.579 ticks per us) */
+
+#define ACPI_PROCESSOR_MAX_POWER        8
+#define ACPI_PROCESSOR_MAX_C2_LATENCY   100
+#define ACPI_PROCESSOR_MAX_C3_LATENCY   1000
+
+extern u32 pmtmr_ioport;
+extern void (*pm_idle) (void);
+
+static void (*pm_idle_save) (void) __read_mostly;
+unsigned int max_cstate __read_mostly = 2;
+integer_param("max_cstate", max_cstate);
+
+struct acpi_processor_cx;
+
+struct acpi_processor_cx_policy
+{
+    u32 count;
+    struct acpi_processor_cx *state;
+    struct
+    {
+        u32 time;
+        u32 ticks;
+        u32 count;
+        u32 bm;
+    } threshold;
+};
+
+struct acpi_processor_cx
+{
+    u8 valid;
+    u8 type;
+    u32 address;
+    u8 space_id;
+    u32 latency;
+    u32 latency_ticks;
+    u32 power;
+    u32 usage;
+    u64 time;
+    struct acpi_processor_cx_policy promotion;
+    struct acpi_processor_cx_policy demotion;
+};
+
+struct acpi_processor_power
+{
+    struct acpi_processor_cx *state;
+    u64 bm_check_timestamp;
+    u32 default_state;
+    u32 bm_activity;
+    u32 count;
+    struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER];
+};
+
+static struct acpi_processor_power processor_powers[NR_CPUS];
+
+static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
+{
+    uint32_t i;
+
+    printk("saved cpu%d cx acpi info:\n", cpu);
+    printk("\tcurrent state is C%d\n", (power->state)?power->state->type:-1);
+    printk("\tbm_check_timestamp = %"PRId64"\n", power->bm_check_timestamp);
+    printk("\tdefault_state = %d\n", power->default_state);
+    printk("\tbm_activity = 0x%08x\n", power->bm_activity);
+    printk("\tcount = %d\n", power->count);
+    
+    for ( i = 0; i < power->count; i++ )
+    {
+        printk("\tstates[%d]:\n", i);
+        printk("\t\tvalid   = %d\n", power->states[i].valid);
+        printk("\t\ttype    = %d\n", power->states[i].type);
+        printk("\t\taddress = 0x%x\n", power->states[i].address);
+        printk("\t\tspace_id = 0x%x\n", power->states[i].space_id);
+        printk("\t\tlatency = %d\n", power->states[i].latency);
+        printk("\t\tpower   = %d\n", power->states[i].power);
+        printk("\t\tlatency_ticks = %d\n", power->states[i].latency_ticks);
+        printk("\t\tusage   = %d\n", power->states[i].usage);
+        printk("\t\ttime    = %"PRId64"\n", power->states[i].time);
+
+        printk("\t\tpromotion policy:\n");
+        printk("\t\t\tcount    = %d\n", power->states[i].promotion.count);
+        printk("\t\t\tstate    = C%d\n",
+               (power->states[i].promotion.state) ? 
+               power->states[i].promotion.state->type : -1);
+        printk("\t\t\tthreshold.time = %d\n", 
power->states[i].promotion.threshold.time);
+        printk("\t\t\tthreshold.ticks = %d\n", 
power->states[i].promotion.threshold.ticks);
+        printk("\t\t\tthreshold.count = %d\n", 
power->states[i].promotion.threshold.count);
+        printk("\t\t\tthreshold.bm = %d\n", 
power->states[i].promotion.threshold.bm);
+
+        printk("\t\tdemotion policy:\n");
+        printk("\t\t\tcount    = %d\n", power->states[i].demotion.count);
+        printk("\t\t\tstate    = C%d\n",
+               (power->states[i].demotion.state) ? 
+               power->states[i].demotion.state->type : -1);
+        printk("\t\t\tthreshold.time = %d\n", 
power->states[i].demotion.threshold.time);
+        printk("\t\t\tthreshold.ticks = %d\n", 
power->states[i].demotion.threshold.ticks);
+        printk("\t\t\tthreshold.count = %d\n", 
power->states[i].demotion.threshold.count);
+        printk("\t\t\tthreshold.bm = %d\n", 
power->states[i].demotion.threshold.bm);
+    }
+}
+
+static void dump_cx(unsigned char key)
+{
+    for( int i = 0; i < num_online_cpus(); i++ )
+        print_acpi_power(i, &processor_powers[i]);
+}
+
+static int __init cpu_idle_key_init(void)
+{
+    register_keyhandler(
+        'c', dump_cx,        "dump cx structures");
+    return 0;
+}
+__initcall(cpu_idle_key_init);
+
+static inline u32 ticks_elapsed(u32 t1, u32 t2)
+{
+    if ( t2 >= t1 )
+        return (t2 - t1);
+    else
+        return ((0xFFFFFFFF - t1) + t2);
+}
+
+static void acpi_processor_power_activate(struct acpi_processor_power *power,
+                                          struct acpi_processor_cx *new)
+{
+    struct acpi_processor_cx *old;
+
+    if ( !power || !new )
+        return;
+
+    old = power->state;
+
+    if ( old )
+        old->promotion.count = 0;
+    new->demotion.count = 0;
+
+    power->state = new;
+
+    return;
+}
+
+static void acpi_safe_halt(void)
+{
+    smp_mb__after_clear_bit();
+    safe_halt();
+}
+
+#define MWAIT_ECX_INTERRUPT_BREAK      (0x1)
+
+static void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
+{
+    __monitor((void *)current, 0, 0);
+    smp_mb();
+    __mwait(eax, ecx);
+}
+
+static void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
+{
+    mwait_idle_with_hints(cx->address, MWAIT_ECX_INTERRUPT_BREAK);
+}
+
+static void acpi_idle_do_entry(struct acpi_processor_cx *cx)
+{
+    if ( cx->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE )
+    {
+        /* Call into architectural FFH based C-state */
+        acpi_processor_ffh_cstate_enter(cx);
+    }
+    else
+    {
+        int unused;
+        /* IO port based C-state */
+        inb(cx->address);
+        /* Dummy wait op - must do something useless after P_LVL2 read
+           because chipsets cannot guarantee that STPCLK# signal
+           gets asserted in time to freeze execution properly. */
+        unused = inl(pmtmr_ioport);
+    }
+}
+
+static void acpi_processor_idle(void)
+{
+    struct acpi_processor_power *power = NULL;
+    struct acpi_processor_cx *cx = NULL;
+    struct acpi_processor_cx *next_state = NULL;
+    int sleep_ticks = 0;
+    u32 t1, t2 = 0;
+
+    power = &processor_powers[smp_processor_id()];
+
+    /*
+     * Interrupts must be disabled during bus mastering calculations and
+     * for C2/C3 transitions.
+     */
+    local_irq_disable();
+    cx = power->state;
+    if ( !cx )
+    {
+        if ( pm_idle_save )
+        {
+            printk(XENLOG_DEBUG "call pm_idle_save()\n");
+            pm_idle_save();
+        }
+        else
+        {
+            printk(XENLOG_DEBUG "call acpi_safe_halt()\n");
+            acpi_safe_halt();
+        }
+        return;
+    }
+
+    /*
+     * Sleep:
+     * ------
+     * Invoke the current Cx state to put the processor to sleep.
+     */
+    if ( cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3 )
+        smp_mb__after_clear_bit();
+
+    switch ( cx->type )
+    {
+    case ACPI_STATE_C1:
+        /*
+         * Invoke C1.
+         * Use the appropriate idle routine, the one that would
+         * be used without acpi C-states.
+         */
+        if ( pm_idle_save )
+            pm_idle_save();
+        else 
+            acpi_safe_halt();
+
+        /*
+         * TBD: Can't get time duration while in C1, as resumes
+         *      go to an ISR rather than here.  Need to instrument
+         *      base interrupt handler.
+         */
+        sleep_ticks = 0xFFFFFFFF;
+        break;
+
+    case ACPI_STATE_C2:
+        /* Get start time (ticks) */
+        t1 = inl(pmtmr_ioport);
+        /* Invoke C2 */
+        acpi_idle_do_entry(cx);
+        /* Get end time (ticks) */
+        t2 = inl(pmtmr_ioport);
+
+        /* Re-enable interrupts */
+        local_irq_enable();
+        /* Compute time (ticks) that we were actually asleep */
+        sleep_ticks =
+            ticks_elapsed(t1, t2) - cx->latency_ticks - C2_OVERHEAD;
+        break;
+    default:
+        local_irq_enable();
+        return;
+    }
+
+    cx->usage++;
+    if ( (cx->type != ACPI_STATE_C1) && (sleep_ticks > 0) )
+        cx->time += sleep_ticks;
+
+    next_state = power->state;
+
+    /*
+     * Promotion?
+     * ----------
+     * Track the number of longs (time asleep is greater than threshold)
+     * and promote when the count threshold is reached.  Note that bus
+     * mastering activity may prevent promotions.
+     * Do not promote above max_cstate.
+     */
+    if ( cx->promotion.state &&
+         ((cx->promotion.state - power->states) <= max_cstate) )
+    {
+        if ( sleep_ticks > cx->promotion.threshold.ticks )
+        {
+            cx->promotion.count++;
+            cx->demotion.count = 0;
+            if ( cx->promotion.count >= cx->promotion.threshold.count )
+            {
+                next_state = cx->promotion.state;
+                goto end;
+            }
+        }
+    }
+
+    /*
+     * Demotion?
+     * ---------
+     * Track the number of shorts (time asleep is less than time threshold)
+     * and demote when the usage threshold is reached.
+     */
+    if ( cx->demotion.state )
+    {
+        if ( sleep_ticks < cx->demotion.threshold.ticks )
+        {
+            cx->demotion.count++;
+            cx->promotion.count = 0;
+            if ( cx->demotion.count >= cx->demotion.threshold.count )
+            {
+                next_state = cx->demotion.state;
+                goto end;
+            }
+        }
+    }
+
+end:
+    /*
+     * Demote if current state exceeds max_cstate
+     */
+    if ( (power->state - power->states) > max_cstate )
+    {
+        if ( cx->demotion.state )
+            next_state = cx->demotion.state;
+    }
+
+    /*
+     * New Cx State?
+     * -------------
+     * If we're going to start using a new Cx state we must clean up
+     * from the previous and prepare to use the new.
+     */
+    if ( next_state != power->state )
+        acpi_processor_power_activate(power, next_state);
+}
+
+static int acpi_processor_set_power_policy(struct acpi_processor_power *power)
+{
+    unsigned int i;
+    unsigned int state_is_set = 0;
+    struct acpi_processor_cx *lower = NULL;
+    struct acpi_processor_cx *higher = NULL;
+    struct acpi_processor_cx *cx;
+
+    if ( !power )
+        return -EINVAL;
+
+    /*
+     * This function sets the default Cx state policy (OS idle handler).
+     * Our scheme is to promote quickly to C2 but more conservatively
+     * to C3.  We're favoring C2  for its characteristics of low latency
+     * (quick response), good power savings, and ability to allow bus
+     * mastering activity.  Note that the Cx state policy is completely
+     * customizable and can be altered dynamically.
+     */
+
+    /* startup state */
+    for ( i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++ )
+    {
+        cx = &power->states[i];
+        if ( !cx->valid )
+            continue;
+
+        if ( !state_is_set )
+            power->state = cx;
+        state_is_set++;
+        break;
+    }
+
+    if ( !state_is_set )
+        return -ENODEV;
+
+    /* demotion */
+    for ( i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++ )
+    {
+        cx = &power->states[i];
+        if ( !cx->valid )
+            continue;
+
+        if ( lower )
+        {
+            cx->demotion.state = lower;
+            cx->demotion.threshold.ticks = cx->latency_ticks;
+            cx->demotion.threshold.count = 1;
+        }
+
+        lower = cx;
+    }
+
+    /* promotion */
+    for ( i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i-- )
+    {
+        cx = &power->states[i];
+        if ( !cx->valid )
+            continue;
+
+        if ( higher )
+        {
+            cx->promotion.state = higher;
+            cx->promotion.threshold.ticks = cx->latency_ticks;
+            if ( cx->type >= ACPI_STATE_C2 )
+                cx->promotion.threshold.count = 4;
+            else
+                cx->promotion.threshold.count = 10;
+        }
+
+        higher = cx;
+    }
+
+    return 0;
+}
+
+static int init_cx_pminfo(struct acpi_processor_power *acpi_power)
+{
+    memset(acpi_power, 0, sizeof(*acpi_power));
+
+    acpi_power->states[ACPI_STATE_C1].type = ACPI_STATE_C1;
+
+    acpi_power->states[ACPI_STATE_C0].valid = 1;
+    acpi_power->states[ACPI_STATE_C1].valid = 1;
+
+    acpi_power->count = 2;
+
+    return 0;
+}
+
+#define CPUID_MWAIT_LEAF (5)
+#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
+#define CPUID5_ECX_INTERRUPT_BREAK      (0x2)
+
+#define MWAIT_ECX_INTERRUPT_BREAK       (0x1)
+
+#define MWAIT_SUBSTATE_MASK (0xf)
+#define MWAIT_SUBSTATE_SIZE (4)
+
+static int acpi_processor_ffh_cstate_probe(xen_processor_cx_t *cx)
+{
+    struct cpuinfo_x86 *c = &current_cpu_data;
+    unsigned int eax, ebx, ecx, edx;
+    unsigned int edx_part;
+    unsigned int cstate_type; /* C-state type and not ACPI C-state type */
+    unsigned int num_cstate_subtype;
+
+    if ( c->cpuid_level < CPUID_MWAIT_LEAF )
+    {
+        printk(XENLOG_INFO "MWAIT leaf not supported by cpuid\n");
+        return -EFAULT;
+    }
+
+    cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
+    printk(XENLOG_DEBUG "cpuid.MWAIT[.eax=%x, .ebx=%x, .ecx=%x, .edx=%x]\n",
+           eax, ebx, ecx, edx);
+
+    /* Check whether this particular cx_type (in CST) is supported or not */
+    cstate_type = (cx->reg.address >> MWAIT_SUBSTATE_SIZE) + 1;
+    edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE);
+    num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK;
+
+    if ( num_cstate_subtype < (cx->reg.address & MWAIT_SUBSTATE_MASK) )
+        return -EFAULT;
+
+    /* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */
+    if ( !(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
+         !(ecx & CPUID5_ECX_INTERRUPT_BREAK) )
+        return -EFAULT;
+
+    printk(XENLOG_INFO "Monitor-Mwait will be used to enter C-%d state\n", 
cx->type);
+    return 0;
+}
+
+#define VENDOR_INTEL                   (1)
+#define NATIVE_CSTATE_BEYOND_HALT      (2)
+
+static int check_cx(xen_processor_cx_t *cx)
+{
+    if ( cx == NULL )
+        return -EINVAL;
+
+    switch ( cx->reg.space_id )
+    {
+    case ACPI_ADR_SPACE_SYSTEM_IO:
+        if ( cx->reg.address == 0 )
+            return -EINVAL;
+        break;
+
+    case ACPI_ADR_SPACE_FIXED_HARDWARE:
+        if ( cx->type > ACPI_STATE_C1 )
+        {
+            if ( cx->reg.bit_width != VENDOR_INTEL || 
+                 cx->reg.bit_offset != NATIVE_CSTATE_BEYOND_HALT )
+                return -EINVAL;
+
+            /* assume all logical cpu has the same support for mwait */
+            if ( acpi_processor_ffh_cstate_probe(cx) )
+                return -EFAULT;
+        }
+        break;
+
+    default:
+        return -ENODEV;
+    }
+
+    return 0;
+}
+
+static int set_cx(struct acpi_processor_power *acpi_power,
+                  xen_processor_cx_t *xen_cx)
+{
+    struct acpi_processor_cx *cx;
+
+    /* skip unsupported acpi cstate */
+    if ( check_cx(xen_cx) )
+        return -EFAULT;
+
+    cx = &acpi_power->states[xen_cx->type];
+    if ( !cx->valid )
+        acpi_power->count++;
+
+    cx->valid    = 1;
+    cx->type     = xen_cx->type;
+    cx->address  = xen_cx->reg.address;
+    cx->space_id = xen_cx->reg.space_id;
+    cx->latency  = xen_cx->latency;
+    cx->power    = xen_cx->power;
+    
+    cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
+
+    return 0;   
+}
+
+static int get_cpu_id(u8 acpi_id)
+{
+    int i;
+    u8 apic_id;
+
+    apic_id = x86_acpiid_to_apicid[acpi_id];
+    if ( apic_id == 0xff )
+        return -1;
+
+    for ( i = 0; i < NR_CPUS; i++ )
+    {
+        if ( apic_id == x86_cpu_to_apicid[i] )
+            return i;
+    }
+
+    return -1;
+}
+
+#ifdef DEBUG_PM_CX
+static void print_cx_pminfo(uint32_t cpu, struct xen_processor_power *power)
+{
+    XEN_GUEST_HANDLE(xen_processor_cx_t) states;
+    xen_processor_cx_t  state;
+    XEN_GUEST_HANDLE(xen_processor_csd_t) csd;
+    xen_processor_csd_t dp;
+    uint32_t i;
+
+    printk("cpu%d cx acpi info:\n", cpu);
+    printk("\tcount = %d\n", power->count);
+    printk("\tflags: bm_cntl[%d], bm_chk[%d], has_cst[%d],\n"
+           "\t       pwr_setup_done[%d], bm_rld_set[%d]\n",
+           power->flags.bm_control, power->flags.bm_check, 
power->flags.has_cst,
+           power->flags.power_setup_done, power->flags.bm_rld_set);
+    
+    states = power->states;
+    
+    for ( i = 0; i < power->count; i++ )
+    {
+        if ( unlikely(copy_from_guest_offset(&state, states, i, 1)) )
+            return;
+        
+        printk("\tstates[%d]:\n", i);
+        printk("\t\treg.space_id = 0x%x\n", state.reg.space_id);
+        printk("\t\treg.bit_width = 0x%x\n", state.reg.bit_width);
+        printk("\t\treg.bit_offset = 0x%x\n", state.reg.bit_offset);
+        printk("\t\treg.access_size = 0x%x\n", state.reg.access_size);
+        printk("\t\treg.address = 0x%"PRIx64"\n", state.reg.address);
+        printk("\t\ttype    = %d\n", state.type);
+        printk("\t\tlatency = %d\n", state.latency);
+        printk("\t\tpower   = %d\n", state.power);
+
+        csd = state.dp;
+        printk("\t\tdp(@0x%p)\n", csd.p);
+        
+        if ( csd.p != NULL )
+        {
+            if ( unlikely(copy_from_guest(&dp, csd, 1)) )
+                return;
+            printk("\t\t\tdomain = %d\n", dp.domain);
+            printk("\t\t\tcoord_type   = %d\n", dp.coord_type);
+            printk("\t\t\tnum = %d\n", dp.num);
+        }
+    }
+}
+#else
+#define print_cx_pminfo(c, p)
+#endif
+
+long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power)
+{
+    XEN_GUEST_HANDLE(xen_processor_cx_t) states;
+    xen_processor_cx_t xen_cx;
+    struct acpi_processor_power *acpi_power;
+    int cpu_id, i;
+
+    if ( unlikely(!guest_handle_okay(power->states, power->count)) )
+        return -EFAULT;
+
+    print_cx_pminfo(cpu, power);
+
+    /* map from acpi_id to cpu_id */
+    cpu_id = get_cpu_id((u8)cpu);
+    if ( cpu_id == -1 )
+    {
+        printk(XENLOG_ERR "no cpu_id for acpi_id %d\n", cpu);
+        return -EFAULT;
+    }
+
+    acpi_power = &processor_powers[cpu_id];
+
+    init_cx_pminfo(acpi_power);
+
+    states = power->states;
+
+    for ( i = 0; i < power->count; i++ )
+    {
+        if ( unlikely(copy_from_guest_offset(&xen_cx, states, i, 1)) )
+            return -EFAULT;
+
+        set_cx(acpi_power, &xen_cx);
+    }
+
+    /* FIXME: C-state dependency is not supported by far */
+    
+    /* initialize default policy */
+    acpi_processor_set_power_policy(acpi_power);
+
+    print_acpi_power(cpu_id, acpi_power);
+
+    if ( cpu_id == 0 && pm_idle_save == NULL )
+    {
+        pm_idle_save = pm_idle;
+        pm_idle = acpi_processor_idle;
+    }
+        
+    return 0;
+}
diff -r ad55c06c9bbc -r 5bb9093eb0e9 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Thu May 01 10:33:03 2008 +0100
+++ b/xen/arch/x86/domain.c     Thu May 01 10:40:01 2008 +0100
@@ -56,6 +56,9 @@ DEFINE_PER_CPU(u64, efer);
 DEFINE_PER_CPU(u64, efer);
 DEFINE_PER_CPU(unsigned long, cr4);
 
+static void default_idle(void);
+void (*pm_idle) (void) = default_idle;
+
 static void unmap_vcpu_info(struct vcpu *v);
 
 static void paravirt_ctxt_switch_from(struct vcpu *v);
@@ -105,7 +108,7 @@ void idle_loop(void)
         if ( cpu_is_offline(smp_processor_id()) )
             play_dead();
         page_scrub_schedule_work();
-        default_idle();
+        (*pm_idle)();
         do_softirq();
     }
 }
diff -r ad55c06c9bbc -r 5bb9093eb0e9 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Thu May 01 10:33:03 2008 +0100
+++ b/xen/arch/x86/platform_hypercall.c Thu May 01 10:40:01 2008 +0100
@@ -44,6 +44,8 @@ extern spinlock_t xenpf_lock;
 
 static DEFINE_PER_CPU(uint64_t, freq);
 
+extern long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power);
+
 static long cpu_frequency_change_helper(void *data)
 {
     return cpu_frequency_change(this_cpu(freq));
@@ -340,6 +342,27 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
     }
     break;
 
+    case XENPF_set_processor_pminfo:
+        switch ( op->u.set_pminfo.type )
+        {
+        case XEN_PM_PX:
+            ret = -EINVAL;
+            break;
+            
+        case XEN_PM_CX:
+            ret = set_cx_pminfo(op->u.set_pminfo.id, &op->u.set_pminfo.power);
+            break;
+
+        case XEN_PM_TX:
+            ret = -EINVAL;
+            break;
+
+        default:
+            ret = -EINVAL;
+            break;
+        }
+        break;
+ 
     default:
         ret = -ENOSYS;
         break;
diff -r ad55c06c9bbc -r 5bb9093eb0e9 xen/arch/x86/x86_64/Makefile
--- a/xen/arch/x86/x86_64/Makefile      Thu May 01 10:33:03 2008 +0100
+++ b/xen/arch/x86/x86_64/Makefile      Thu May 01 10:40:01 2008 +0100
@@ -12,6 +12,7 @@ obj-$(CONFIG_COMPAT) += domain.o
 obj-$(CONFIG_COMPAT) += domain.o
 obj-$(CONFIG_COMPAT) += physdev.o
 obj-$(CONFIG_COMPAT) += platform_hypercall.o
+obj-$(CONFIG_COMPAT) += cpu_idle.o
 
 ifeq ($(CONFIG_COMPAT),y)
 # extra dependencies
@@ -22,4 +23,5 @@ platform_hypercall.o: ../platform_hyperc
 platform_hypercall.o: ../platform_hypercall.c
 sysctl.o:      ../sysctl.c
 traps.o:       compat/traps.c
+cpu_idle.o:    ../acpi/cpu_idle.c
 endif
diff -r ad55c06c9bbc -r 5bb9093eb0e9 xen/arch/x86/x86_64/cpu_idle.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/x86_64/cpu_idle.c    Thu May 01 10:40:01 2008 +0100
@@ -0,0 +1,128 @@
+/******************************************************************************
+ * cpu_idle.c -- adapt x86/acpi/cpu_idle.c to compat guest.
+ *
+ *  Copyright (C) 2007, 2008 Intel Corporation
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#define __XEN_TOOLS__ /* for using get_xen_guest_handle macro */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/xmalloc.h>
+#include <xen/guest_access.h>
+#include <compat/platform.h>
+
+CHECK_processor_csd;
+
+DEFINE_XEN_GUEST_HANDLE(compat_processor_csd_t);
+DEFINE_XEN_GUEST_HANDLE(compat_processor_cx_t);
+
+#define xlat_page_start COMPAT_ARG_XLAT_VIRT_START(current->vcpu_id)
+#define xlat_page_size  COMPAT_ARG_XLAT_SIZE
+#define xlat_page_left_size(xlat_page_current) \
+    (xlat_page_start + xlat_page_size - xlat_page_current)
+
+#define xlat_malloc_init(xlat_page_current)    do { \
+    xlat_page_current = xlat_page_start; \
+} while (0)
+
+static void *xlat_malloc(unsigned long *xlat_page_current, size_t size)
+{
+    void *ret;
+
+    /* normalize size to be 64 * n */
+    size = (size + 0x3fUL) & ~0x3fUL;
+
+    if ( unlikely(size > xlat_page_left_size(*xlat_page_current)) )
+        return NULL;
+
+    ret = (void *) *xlat_page_current;
+    *xlat_page_current += size;
+
+    return ret;
+}
+
+#define xlat_malloc_array(_p, _t, _c) ((_t *) xlat_malloc(&_p, sizeof(_t) * 
_c))
+
+static int copy_from_compat_state(xen_processor_cx_t *xen_state,
+                                  compat_processor_cx_t *state)
+{
+#define XLAT_processor_cx_HNDL_dp(_d_, _s_) do { \
+    XEN_GUEST_HANDLE(compat_processor_csd_t) dps; \
+    if ( unlikely(!compat_handle_okay((_s_)->dp, (_s_)->dpcnt)) ) \
+            return -EFAULT; \
+    guest_from_compat_handle(dps, (_s_)->dp); \
+    (_d_)->dp = guest_handle_cast(dps, xen_processor_csd_t); \
+} while (0)
+    XLAT_processor_cx(xen_state, state);
+#undef XLAT_processor_cx_HNDL_dp
+
+    return 0;
+}
+
+extern long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power);
+
+long compat_set_cx_pminfo(uint32_t cpu, struct compat_processor_power *power)
+{
+    struct xen_processor_power *xen_power;
+    unsigned long xlat_page_current;
+
+    xlat_malloc_init(xlat_page_current);
+
+    xen_power = xlat_malloc_array(xlat_page_current,
+                                  struct xen_processor_power, 1);
+    if ( unlikely(xen_power == NULL) )
+       return -EFAULT;
+
+#define XLAT_processor_power_HNDL_states(_d_, _s_) do { \
+    xen_processor_cx_t *xen_states = NULL; \
+\
+    if ( likely((_s_)->count > 0) ) \
+    { \
+        XEN_GUEST_HANDLE(compat_processor_cx_t) states; \
+        compat_processor_cx_t state; \
+        int i; \
+\
+        xen_states = xlat_malloc_array(xlat_page_current, \
+                                       xen_processor_cx_t, (_s_)->count); \
+        if ( unlikely(xen_states == NULL) ) \
+            return -EFAULT; \
+\
+        if ( unlikely(!compat_handle_okay((_s_)->states, (_s_)->count)) ) \
+            return -EFAULT; \
+        guest_from_compat_handle(states, (_s_)->states); \
+\
+        for ( i = 0; i < _s_->count; i++ ) \
+        { \
+           if ( unlikely(copy_from_guest_offset(&state, states, i, 1)) ) \
+               return -EFAULT; \
+           if ( unlikely(copy_from_compat_state(&xen_states[i], &state)) ) \
+               return -EFAULT; \
+        } \
+    } \
+\
+    set_xen_guest_handle((_d_)->states, xen_states); \
+} while (0)
+    XLAT_processor_power(xen_power, power);
+#undef XLAT_processor_power_HNDL_states
+
+    return set_cx_pminfo(cpu, xen_power);
+}
diff -r ad55c06c9bbc -r 5bb9093eb0e9 xen/arch/x86/x86_64/platform_hypercall.c
--- a/xen/arch/x86/x86_64/platform_hypercall.c  Thu May 01 10:33:03 2008 +0100
+++ b/xen/arch/x86/x86_64/platform_hypercall.c  Thu May 01 10:40:01 2008 +0100
@@ -10,6 +10,10 @@ DEFINE_XEN_GUEST_HANDLE(compat_platform_
 #define xen_platform_op     compat_platform_op
 #define xen_platform_op_t   compat_platform_op_t
 #define do_platform_op(x)   compat_platform_op(_##x)
+
+#define xen_processor_power     compat_processor_power
+#define xen_processor_power_t   compat_processor_power_t
+#define set_cx_pminfo           compat_set_cx_pminfo
 
 #define xenpf_enter_acpi_sleep compat_pf_enter_acpi_sleep
 
diff -r ad55c06c9bbc -r 5bb9093eb0e9 xen/include/public/platform.h
--- a/xen/include/public/platform.h     Thu May 01 10:33:03 2008 +0100
+++ b/xen/include/public/platform.h     Thu May 01 10:40:01 2008 +0100
@@ -199,6 +199,70 @@ typedef struct xenpf_getidletime xenpf_g
 typedef struct xenpf_getidletime xenpf_getidletime_t;
 DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t);
 
+#define XENPF_set_processor_pminfo      54
+
+/* ability bits */
+#define XEN_PROCESSOR_PM_CX    1
+#define XEN_PROCESSOR_PM_PX    2
+#define XEN_PROCESSOR_PM_TX    4
+
+/* cmd type */
+#define XEN_PM_CX   0
+#define XEN_PM_PX   1
+#define XEN_PM_TX   2
+
+struct xen_power_register {
+    uint32_t     space_id;
+    uint32_t     bit_width;
+    uint32_t     bit_offset;
+    uint32_t     access_size;
+    uint64_t     address;
+};
+
+struct xen_processor_csd {
+    uint32_t    domain;      /* domain number of one dependent group */
+    uint32_t    coord_type;  /* coordination type */
+    uint32_t    num;         /* number of processors in same domain */
+};
+typedef struct xen_processor_csd xen_processor_csd_t;
+DEFINE_XEN_GUEST_HANDLE(xen_processor_csd_t);
+
+struct xen_processor_cx {
+    struct xen_power_register  reg; /* GAS for Cx trigger register */
+    uint8_t     type;     /* cstate value, c0: 0, c1: 1, ... */
+    uint32_t    latency;  /* worst latency (ms) to enter/exit this cstate */
+    uint32_t    power;    /* average power consumption(mW) */
+    uint32_t    dpcnt;    /* number of dependency entries */
+    XEN_GUEST_HANDLE(xen_processor_csd_t) dp; /* NULL if no dependency */
+};
+typedef struct xen_processor_cx xen_processor_cx_t;
+DEFINE_XEN_GUEST_HANDLE(xen_processor_cx_t);
+
+struct xen_processor_flags {
+    uint32_t bm_control:1;
+    uint32_t bm_check:1;
+    uint32_t has_cst:1;
+    uint32_t power_setup_done:1;
+    uint32_t bm_rld_set:1;
+};
+
+struct xen_processor_power {
+    uint32_t count;  /* number of C state entries in array below */
+    struct xen_processor_flags flags;  /* global flags of this processor */
+    XEN_GUEST_HANDLE(xen_processor_cx_t) states; /* supported c states */
+};
+
+struct xenpf_set_processor_pminfo {
+    /* IN variables */
+    uint32_t id;    /* ACPI CPU ID */
+    uint32_t type;  /* {XEN_PM_CX, ...} */
+    union {
+        struct xen_processor_power          power;/* Cx: _CST/_CSD */
+    };
+};
+typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_set_processor_pminfo_t);
+
 struct xen_platform_op {
     uint32_t cmd;
     uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -213,6 +277,7 @@ struct xen_platform_op {
         struct xenpf_enter_acpi_sleep  enter_acpi_sleep;
         struct xenpf_change_freq       change_freq;
         struct xenpf_getidletime       getidletime;
+        struct xenpf_set_processor_pminfo set_pminfo;
         uint8_t                        pad[128];
     } u;
 };
diff -r ad55c06c9bbc -r 5bb9093eb0e9 xen/include/xlat.lst
--- a/xen/include/xlat.lst      Thu May 01 10:33:03 2008 +0100
+++ b/xen/include/xlat.lst      Thu May 01 10:40:01 2008 +0100
@@ -44,3 +44,8 @@
 !      vcpu_runstate_info              vcpu.h
 ?      xenoprof_init                   xenoprof.h
 ?      xenoprof_passive                xenoprof.h
+!      power_register                  platform.h
+?      processor_csd                   platform.h
+!      processor_cx                    platform.h
+!      processor_flags                 platform.h
+!      processor_power                 platform.h

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] Add basic acpi C-states based cpu idle power mgmt in xen for x86., Xen patchbot-unstable <=