WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] X86 and IA64: Rebase cpufreq logic for su

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] X86 and IA64: Rebase cpufreq logic for supporting both x86 and ia64
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Tue, 30 Sep 2008 08:00:07 -0700
Delivery-date: Tue, 30 Sep 2008 08:00:48 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1222434278 -3600
# Node ID 08374be213188c10eb7c170c143ca0d0d17e55d8
# Parent  5274aa966231ad5f050987e1fb00c6f6ee75a007
X86 and IA64: Rebase cpufreq logic for supporting both x86 and ia64
arch

Rebase cpufreq logic for supporting both x86 and ia64 arch:
1. move cpufreq arch-independent logic into common dir
(xen/drivers/acpi
and xen/drivers/cpufreq dir);
2. leave cpufreq x86-dependent logic at xen/arch/x86/acpi/cpufreq dir;

Signed-off-by: Yu, Ke <ke.yu@xxxxxxxxx>
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
---
 xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c |  246 ------------------
 xen/arch/x86/acpi/cpufreq/utility.c          |  368 ---------------------------
 xen/arch/x86/acpi/pmstat.c                   |  155 -----------
 xen/arch/x86/acpi/Makefile                   |    1 
 xen/arch/x86/acpi/cpufreq/Makefile           |    2 
 xen/arch/x86/acpi/cpufreq/cpufreq.c          |  153 -----------
 xen/drivers/Makefile                         |    1 
 xen/drivers/acpi/Makefile                    |    1 
 xen/drivers/acpi/pmstat.c                    |  155 +++++++++++
 xen/drivers/cpufreq/Makefile                 |    3 
 xen/drivers/cpufreq/cpufreq.c                |  188 +++++++++++++
 xen/drivers/cpufreq/cpufreq_ondemand.c       |  246 ++++++++++++++++++
 xen/drivers/cpufreq/utility.c                |  368 +++++++++++++++++++++++++++
 xen/include/acpi/cpufreq/cpufreq.h           |    7 
 xen/include/acpi/cpufreq/processor_perf.h    |   10 
 15 files changed, 976 insertions(+), 928 deletions(-)

diff -r 5274aa966231 -r 08374be21318 xen/arch/x86/acpi/Makefile
--- a/xen/arch/x86/acpi/Makefile        Fri Sep 26 11:12:29 2008 +0100
+++ b/xen/arch/x86/acpi/Makefile        Fri Sep 26 14:04:38 2008 +0100
@@ -2,4 +2,3 @@ subdir-y += cpufreq
 
 obj-y += boot.o
 obj-y += power.o suspend.o wakeup_prot.o cpu_idle.o cpuidle_menu.o
-obj-y += pmstat.o
diff -r 5274aa966231 -r 08374be21318 xen/arch/x86/acpi/cpufreq/Makefile
--- a/xen/arch/x86/acpi/cpufreq/Makefile        Fri Sep 26 11:12:29 2008 +0100
+++ b/xen/arch/x86/acpi/cpufreq/Makefile        Fri Sep 26 14:04:38 2008 +0100
@@ -1,4 +1,2 @@ obj-y += cpufreq.o
 obj-y += cpufreq.o
-obj-y += utility.o
-obj-y += cpufreq_ondemand.o
 obj-y += powernow.o
diff -r 5274aa966231 -r 08374be21318 xen/arch/x86/acpi/cpufreq/cpufreq.c
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c       Fri Sep 26 11:12:29 2008 +0100
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c       Fri Sep 26 14:04:38 2008 +0100
@@ -45,9 +45,6 @@
 #include <acpi/acpi.h>
 #include <acpi/cpufreq/cpufreq.h>
 
-/* TODO: change to link list later as domain number may be sparse */
-static cpumask_t cpufreq_dom_map[NR_CPUS];
-
 enum {
     UNDEFINED_CAPABLE = 0,
     SYSTEM_INTEL_MSR_CAPABLE,
@@ -56,13 +53,6 @@ enum {
 
 #define INTEL_MSR_RANGE         (0xffff)
 #define CPUID_6_ECX_APERFMPERF_CAPABILITY       (0x1)
-
-struct acpi_cpufreq_data {
-    struct processor_performance *acpi_data;
-    struct cpufreq_frequency_table *freq_table;
-    unsigned int max_freq;
-    unsigned int cpu_feature;
-};
 
 static struct acpi_cpufreq_data *drv_data[NR_CPUS];
 
@@ -342,7 +332,7 @@ static int acpi_cpufreq_target(struct cp
             policy->resume = 0;
         }
         else {
-            printk(KERN_INFO "Already at target state (P%d)\n", 
+            printk(KERN_DEBUG "Already at target state (P%d)\n", 
                 next_perf_state);
             return 0;
         }
@@ -379,7 +369,7 @@ static int acpi_cpufreq_target(struct cp
     if (!check_freqs(cmd.mask, freqs.new, data))
         return -EAGAIN;
 
-    px_statistic_update(cmd.mask, perf->state, next_perf_state);
+    cpufreq_statistic_update(cmd.mask, perf->state, next_perf_state);
 
     perf->state = next_perf_state;
     policy->cur = freqs.new;
@@ -581,145 +571,6 @@ static struct cpufreq_driver acpi_cpufre
     .exit   = acpi_cpufreq_cpu_exit,
 };
 
-int cpufreq_limit_change(unsigned int cpu)
-{
-    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
-    struct cpufreq_policy *data = cpufreq_cpu_policy[cpu];
-    struct cpufreq_policy policy;
-
-    if (!cpu_online(cpu) || !data || !processor_pminfo[cpu])
-        return -ENODEV;
-
-    if ((perf->platform_limit < 0) || 
-        (perf->platform_limit >= perf->state_count))
-        return -EINVAL;
-
-    memcpy(&policy, data, sizeof(struct cpufreq_policy)); 
-
-    policy.max =
-        perf->states[perf->platform_limit].core_frequency * 1000;
-
-    return __cpufreq_set_policy(data, &policy);
-}
-
-int cpufreq_add_cpu(unsigned int cpu)
-{
-    int ret = 0;
-    unsigned int firstcpu;
-    unsigned int dom;
-    unsigned int j;
-    struct cpufreq_policy new_policy;
-    struct cpufreq_policy *policy;
-    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
-
-    /* to protect the case when Px was not controlled by xen */
-    if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
-        return 0;
-
-    if (cpu_is_offline(cpu) || cpufreq_cpu_policy[cpu])
-        return -EINVAL;
-
-    ret = px_statistic_init(cpu);
-    if (ret)
-        return ret;
-
-    dom = perf->domain_info.domain;
-    if (cpus_weight(cpufreq_dom_map[dom])) {
-        /* share policy with the first cpu since on same boat */
-        firstcpu = first_cpu(cpufreq_dom_map[dom]);
-        policy = cpufreq_cpu_policy[firstcpu];
-
-        cpufreq_cpu_policy[cpu] = policy;
-        cpu_set(cpu, cpufreq_dom_map[dom]);
-        cpu_set(cpu, policy->cpus);
-
-        printk(KERN_EMERG"adding CPU %u\n", cpu);
-    } else {
-        /* for the first cpu, setup policy and do init work */
-        policy = xmalloc(struct cpufreq_policy);
-        if (!policy) {
-            px_statistic_exit(cpu);
-            return -ENOMEM;
-        }
-        memset(policy, 0, sizeof(struct cpufreq_policy));
-
-        cpufreq_cpu_policy[cpu] = policy;
-        cpu_set(cpu, cpufreq_dom_map[dom]);
-        cpu_set(cpu, policy->cpus);
-
-        policy->cpu = cpu;
-        ret = cpufreq_driver->init(policy);
-        if (ret)
-            goto err1;
-        printk(KERN_EMERG"CPU %u initialization completed\n", cpu);
-    }
-
-    /*
-     * After get full cpumap of the coordination domain,
-     * we can safely start gov here.
-     */
-    if (cpus_weight(cpufreq_dom_map[dom]) ==
-        perf->domain_info.num_processors) {
-        memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
-        policy->governor = NULL;
-        ret = __cpufreq_set_policy(policy, &new_policy);
-        if (ret)
-            goto err2;
-    }
-
-    return 0;
-
-err2:
-    cpufreq_driver->exit(policy);
-err1:
-    for_each_cpu_mask(j, cpufreq_dom_map[dom]) {
-        cpufreq_cpu_policy[j] = NULL;
-        px_statistic_exit(j);
-    }
-
-    cpus_clear(cpufreq_dom_map[dom]);
-    xfree(policy);
-    return ret;
-}
-
-int cpufreq_del_cpu(unsigned int cpu)
-{
-    unsigned int dom;
-    struct cpufreq_policy *policy;
-    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
-
-    /* to protect the case when Px was not controlled by xen */
-    if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
-        return 0;
-
-    if (cpu_is_offline(cpu) || !cpufreq_cpu_policy[cpu])
-        return -EINVAL;
-
-    dom = perf->domain_info.domain;
-    policy = cpufreq_cpu_policy[cpu];
-
-    printk(KERN_EMERG"deleting CPU %u\n", cpu);
-
-    /* for the first cpu of the domain, stop gov */
-    if (cpus_weight(cpufreq_dom_map[dom]) ==
-        perf->domain_info.num_processors)
-        __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
-
-    cpufreq_cpu_policy[cpu] = NULL;
-    cpu_clear(cpu, policy->cpus);
-    cpu_clear(cpu, cpufreq_dom_map[dom]);
-    px_statistic_exit(cpu);
-
-    /* for the last cpu of the domain, clean room */
-    /* It's safe here to free freq_table, drv_data and policy */
-    if (!cpus_weight(cpufreq_dom_map[dom])) {
-        cpufreq_driver->exit(policy);
-        xfree(policy);
-    }
-
-    return 0;
-}
-
 static int __init cpufreq_driver_init(void)
 {
     int ret = 0;
diff -r 5274aa966231 -r 08374be21318 
xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c
--- a/xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c      Fri Sep 26 11:12:29 
2008 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,246 +0,0 @@
-/*
- *  xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c
- *
- *  Copyright (C)  2001 Russell King
- *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@xxxxxxxxx>.
- *                      Jun Nakajima <jun.nakajima@xxxxxxxxx>
- *             Feb 2008 Liu Jinsong <jinsong.liu@xxxxxxxxx>
- *             Porting cpufreq_ondemand.c from Liunx 2.6.23 to Xen hypervisor 
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <xen/types.h>
-#include <xen/percpu.h>
-#include <xen/cpumask.h>
-#include <xen/types.h>
-#include <xen/sched.h>
-#include <xen/timer.h>
-#include <asm/config.h>
-#include <acpi/cpufreq/cpufreq.h>
-
-#define DEF_FREQUENCY_UP_THRESHOLD              (80)
-
-#define MIN_DBS_INTERVAL                        (MICROSECS(100))
-#define MIN_SAMPLING_MILLISECS                  (20)
-#define MIN_STAT_SAMPLING_RATE                   \
-    (MIN_SAMPLING_MILLISECS * MILLISECS(1))
-#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER    (1000)
-#define TRANSITION_LATENCY_LIMIT                (10 * 1000 )
-
-static uint64_t def_sampling_rate;
-
-/* Sampling types */
-enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
-
-static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
-
-static unsigned int dbs_enable;    /* number of CPUs using this policy */
-
-static struct dbs_tuners {
-    uint64_t     sampling_rate;
-    unsigned int up_threshold;
-    unsigned int ignore_nice;
-    unsigned int powersave_bias;
-} dbs_tuners_ins = {
-    .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
-    .ignore_nice = 0,
-    .powersave_bias = 0,
-};
-
-static struct timer dbs_timer[NR_CPUS];
-
-uint64_t get_cpu_idle_time(unsigned int cpu)
-{
-    uint64_t idle_ns;
-    struct vcpu *v;
-
-    if ((v = idle_vcpu[cpu]) == NULL)
-        return 0;
-
-    idle_ns = v->runstate.time[RUNSTATE_running];
-    if (v->is_running)
-        idle_ns += NOW() - v->runstate.state_entry_time;
-
-    return idle_ns;
-}
-
-static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
-{
-    unsigned int load = 0;
-    uint64_t cur_ns, idle_ns, total_ns;
-
-    struct cpufreq_policy *policy;
-    unsigned int j;
-
-    if (!this_dbs_info->enable)
-        return;
-
-    policy = this_dbs_info->cur_policy;
-
-    if (unlikely(policy->resume)) {
-        __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H);
-        return;
-    }
-
-    cur_ns = NOW();
-    total_ns = cur_ns - this_dbs_info->prev_cpu_wall;
-    this_dbs_info->prev_cpu_wall = NOW();
-
-    if (total_ns < MIN_DBS_INTERVAL)
-        return;
-
-    /* Get Idle Time */
-    idle_ns = UINT_MAX;
-    for_each_cpu_mask(j, policy->cpus) {
-        uint64_t total_idle_ns;
-        unsigned int tmp_idle_ns;
-        struct cpu_dbs_info_s *j_dbs_info;
-
-        j_dbs_info = &per_cpu(cpu_dbs_info, j);
-        total_idle_ns = get_cpu_idle_time(j);
-        tmp_idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle;
-        j_dbs_info->prev_cpu_idle = total_idle_ns;
-
-        if (tmp_idle_ns < idle_ns)
-            idle_ns = tmp_idle_ns;
-    }
-
-    if (likely(total_ns > idle_ns))
-        load = (100 * (total_ns - idle_ns)) / total_ns;
-
-    /* Check for frequency increase */
-    if (load > dbs_tuners_ins.up_threshold) {
-        /* if we are already at full speed then break out early */
-        if (policy->cur == policy->max)
-            return;
-        __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H);
-        return;
-    }
-
-    /* Check for frequency decrease */
-    /* if we cannot reduce the frequency anymore, break out early */
-    if (policy->cur == policy->min)
-        return;
-
-    /*
-     * The optimal frequency is the frequency that is the lowest that
-     * can support the current CPU usage without triggering the up
-     * policy. To be safe, we focus 10 points under the threshold.
-     */
-    if (load < (dbs_tuners_ins.up_threshold - 10)) {
-        unsigned int freq_next, freq_cur;
-
-        freq_cur = __cpufreq_driver_getavg(policy);
-        if (!freq_cur)
-            freq_cur = policy->cur;
-
-        freq_next = (freq_cur * load) / (dbs_tuners_ins.up_threshold - 10);
-
-        __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
-    }
-}
-
-static void do_dbs_timer(void *dbs)
-{
-    struct cpu_dbs_info_s *dbs_info = (struct cpu_dbs_info_s *)dbs;
-
-    if (!dbs_info->enable)
-        return;
-
-    dbs_check_cpu(dbs_info);
-
-    set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
-}
-
-static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
-{
-    dbs_info->enable = 1;
-
-    init_timer(&dbs_timer[dbs_info->cpu], do_dbs_timer, 
-        (void *)dbs_info, dbs_info->cpu);
-
-    set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
-}
-
-static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
-{
-    dbs_info->enable = 0;
-    stop_timer(&dbs_timer[dbs_info->cpu]);
-}
-
-int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event)
-{
-    unsigned int cpu = policy->cpu;
-    struct cpu_dbs_info_s *this_dbs_info;
-    unsigned int j;
-
-    this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
-
-    switch (event) {
-    case CPUFREQ_GOV_START:
-        if ((!cpu_online(cpu)) || (!policy->cur))
-            return -EINVAL;
-
-        if (policy->cpuinfo.transition_latency >
-            (TRANSITION_LATENCY_LIMIT * 1000)) {
-            printk(KERN_WARNING "ondemand governor failed to load "
-                "due to too long transition latency\n");
-            return -EINVAL;
-        }
-        if (this_dbs_info->enable)
-            /* Already enabled */
-            break;
-
-        dbs_enable++;
-
-        for_each_cpu_mask(j, policy->cpus) {
-            struct cpu_dbs_info_s *j_dbs_info;
-            j_dbs_info = &per_cpu(cpu_dbs_info, j);
-            j_dbs_info->cur_policy = policy;
-
-            j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
-            j_dbs_info->prev_cpu_wall = NOW();
-        }
-        this_dbs_info->cpu = cpu;
-        /*
-         * Start the timerschedule work, when this governor
-         * is used for first time
-         */
-        if (dbs_enable == 1) {
-            def_sampling_rate = policy->cpuinfo.transition_latency *
-                DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
-
-            if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
-                def_sampling_rate = MIN_STAT_SAMPLING_RATE;
-
-            dbs_tuners_ins.sampling_rate = def_sampling_rate;
-        }
-        dbs_timer_init(this_dbs_info);
-
-        break;
-
-    case CPUFREQ_GOV_STOP:
-        dbs_timer_exit(this_dbs_info);
-        dbs_enable--;
-
-        break;
-
-    case CPUFREQ_GOV_LIMITS:
-        if (policy->max < this_dbs_info->cur_policy->cur)
-            __cpufreq_driver_target(this_dbs_info->cur_policy,
-                policy->max, CPUFREQ_RELATION_H);
-        else if (policy->min > this_dbs_info->cur_policy->cur)
-            __cpufreq_driver_target(this_dbs_info->cur_policy,
-                policy->min, CPUFREQ_RELATION_L);
-        break;
-    }
-    return 0;
-}
-
-struct cpufreq_governor cpufreq_gov_dbs = {
-    .name = "ondemand",
-    .governor = cpufreq_governor_dbs,
-};
diff -r 5274aa966231 -r 08374be21318 xen/arch/x86/acpi/cpufreq/utility.c
--- a/xen/arch/x86/acpi/cpufreq/utility.c       Fri Sep 26 11:12:29 2008 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,368 +0,0 @@
-/*
- *  utility.c - misc functions for cpufreq driver and Px statistic
- *
- *  Copyright (C) 2001 Russell King
- *            (C) 2002 - 2003 Dominik Brodowski <linux@xxxxxxxx>
- *
- *  Oct 2005 - Ashok Raj <ashok.raj@xxxxxxxxx>
- *    Added handling for CPU hotplug
- *  Feb 2006 - Jacob Shin <jacob.shin@xxxxxxx>
- *    Fix handling for CPU hotplug -- affected CPUs
- *  Feb 2008 - Liu Jinsong <jinsong.liu@xxxxxxxxx>
- *    1. Merge cpufreq.c and freq_table.c of linux 2.6.23
- *    And poring to Xen hypervisor
- *    2. some Px statistic interface funcdtions
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <xen/errno.h>
-#include <xen/cpumask.h>
-#include <xen/types.h>
-#include <xen/spinlock.h>
-#include <xen/percpu.h>
-#include <xen/types.h>
-#include <xen/sched.h>
-#include <xen/timer.h>
-#include <asm/config.h>
-#include <acpi/cpufreq/cpufreq.h>
-#include <public/sysctl.h>
-
-struct cpufreq_driver   *cpufreq_driver;
-struct processor_pminfo *__read_mostly processor_pminfo[NR_CPUS];
-struct cpufreq_policy   *__read_mostly cpufreq_cpu_policy[NR_CPUS];
-
-/*********************************************************************
- *                    Px STATISTIC INFO                              *
- *********************************************************************/
-
-void px_statistic_update(cpumask_t cpumask, uint8_t from, uint8_t to)
-{
-    uint32_t i;
-    uint64_t now;
-
-    now = NOW();
-
-    for_each_cpu_mask(i, cpumask) {
-        struct pm_px *pxpt = px_statistic_data[i];
-        struct processor_pminfo *pmpt = processor_pminfo[i];
-        uint64_t total_idle_ns;
-        uint64_t tmp_idle_ns;
-
-        if ( !pxpt || !pmpt )
-            continue;
-
-        total_idle_ns = get_cpu_idle_time(i);
-        tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
-
-        pxpt->u.last = from;
-        pxpt->u.cur = to;
-        pxpt->u.pt[to].count++;
-        pxpt->u.pt[from].residency += now - pxpt->prev_state_wall;
-        pxpt->u.pt[from].residency -= tmp_idle_ns;
-
-        (*(pxpt->u.trans_pt + from * pmpt->perf.state_count + to))++;
-
-        pxpt->prev_state_wall = now;
-        pxpt->prev_idle_wall = total_idle_ns;
-    }
-}
-
-int px_statistic_init(unsigned int cpuid)
-{
-    uint32_t i, count;
-    struct pm_px *pxpt = px_statistic_data[cpuid];
-    const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
-
-    count = pmpt->perf.state_count;
-
-    if ( !pmpt )
-        return -EINVAL;
-
-    if ( !pxpt )
-    {
-        pxpt = xmalloc(struct pm_px);
-        if ( !pxpt )
-            return -ENOMEM;
-        memset(pxpt, 0, sizeof(*pxpt));
-        px_statistic_data[cpuid] = pxpt;
-    }
-
-    pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count);
-    if (!pxpt->u.trans_pt)
-        return -ENOMEM;
-
-    pxpt->u.pt = xmalloc_array(struct pm_px_val, count);
-    if (!pxpt->u.pt) {
-        xfree(pxpt->u.trans_pt);
-        return -ENOMEM;
-    }
-
-    memset(pxpt->u.trans_pt, 0, count * count * (sizeof(uint64_t)));
-    memset(pxpt->u.pt, 0, count * (sizeof(struct pm_px_val)));
-
-    pxpt->u.total = pmpt->perf.state_count;
-    pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
-
-    for (i=0; i < pmpt->perf.state_count; i++)
-        pxpt->u.pt[i].freq = pmpt->perf.states[i].core_frequency;
-
-    pxpt->prev_state_wall = NOW();
-    pxpt->prev_idle_wall = get_cpu_idle_time(cpuid);
-
-    return 0;
-}
-
-void px_statistic_exit(unsigned int cpuid)
-{
-    struct pm_px *pxpt = px_statistic_data[cpuid];
-
-    if (!pxpt)
-        return;
-    xfree(pxpt->u.trans_pt);
-    xfree(pxpt->u.pt);
-    memset(pxpt, 0, sizeof(struct pm_px));
-}
-
-void px_statistic_reset(unsigned int cpuid)
-{
-    uint32_t i, j, count;
-    struct pm_px *pxpt = px_statistic_data[cpuid];
-    const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
-
-    if ( !pxpt || !pmpt )
-        return;
-
-    count = pmpt->perf.state_count;
-
-    for (i=0; i < count; i++) {
-        pxpt->u.pt[i].residency = 0;
-        pxpt->u.pt[i].count = 0;
-
-        for (j=0; j < count; j++)
-            *(pxpt->u.trans_pt + i*count + j) = 0;
-    }
-
-    pxpt->prev_state_wall = NOW();
-    pxpt->prev_idle_wall = get_cpu_idle_time(cpuid);
-}
-
-
-/*********************************************************************
- *                   FREQUENCY TABLE HELPERS                         *
- *********************************************************************/
-
-int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
-                                    struct cpufreq_frequency_table *table)
-{
-    unsigned int min_freq = ~0;
-    unsigned int max_freq = 0;
-    unsigned int i;
-
-    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
-        unsigned int freq = table[i].frequency;
-        if (freq == CPUFREQ_ENTRY_INVALID)
-            continue;
-        if (freq < min_freq)
-            min_freq = freq;
-        if (freq > max_freq)
-            max_freq = freq;
-    }
-
-    policy->min = policy->cpuinfo.min_freq = min_freq;
-    policy->max = policy->cpuinfo.max_freq = max_freq;
-
-    if (policy->min == ~0)
-        return -EINVAL;
-    else
-        return 0;
-}
-
-int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
-                                   struct cpufreq_frequency_table *table)
-{
-    unsigned int next_larger = ~0;
-    unsigned int i;
-    unsigned int count = 0;
-
-    if (!cpu_online(policy->cpu))
-        return -EINVAL;
-
-    cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
-                                 policy->cpuinfo.max_freq);
-
-    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
-        unsigned int freq = table[i].frequency;
-        if (freq == CPUFREQ_ENTRY_INVALID)
-            continue;
-        if ((freq >= policy->min) && (freq <= policy->max))
-            count++;
-        else if ((next_larger > freq) && (freq > policy->max))
-            next_larger = freq;
-    }
-
-    if (!count)
-        policy->max = next_larger;
-
-    cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
-                                 policy->cpuinfo.max_freq);
-
-    return 0;
-}
-
-int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
-                                   struct cpufreq_frequency_table *table,
-                                   unsigned int target_freq,
-                                   unsigned int relation,
-                                   unsigned int *index)
-{
-    struct cpufreq_frequency_table optimal = {
-        .index = ~0,
-        .frequency = 0,
-    };
-    struct cpufreq_frequency_table suboptimal = {
-        .index = ~0,
-        .frequency = 0,
-    };
-    unsigned int i;
-
-    switch (relation) {
-    case CPUFREQ_RELATION_H:
-        suboptimal.frequency = ~0;
-        break;
-    case CPUFREQ_RELATION_L:
-        optimal.frequency = ~0;
-        break;
-    }
-
-    if (!cpu_online(policy->cpu))
-        return -EINVAL;
-
-    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
-        unsigned int freq = table[i].frequency;
-        if (freq == CPUFREQ_ENTRY_INVALID)
-            continue;
-        if ((freq < policy->min) || (freq > policy->max))
-            continue;
-        switch(relation) {
-        case CPUFREQ_RELATION_H:
-            if (freq <= target_freq) {
-                if (freq >= optimal.frequency) {
-                    optimal.frequency = freq;
-                    optimal.index = i;
-                }
-            } else {
-                if (freq <= suboptimal.frequency) {
-                    suboptimal.frequency = freq;
-                    suboptimal.index = i;
-                }
-            }
-            break;
-        case CPUFREQ_RELATION_L:
-            if (freq >= target_freq) {
-                if (freq <= optimal.frequency) {
-                    optimal.frequency = freq;
-                    optimal.index = i;
-                }
-            } else {
-                if (freq >= suboptimal.frequency) {
-                    suboptimal.frequency = freq;
-                    suboptimal.index = i;
-                }
-            }
-            break;
-        }
-    }
-    if (optimal.index > i) {
-        if (suboptimal.index > i)
-            return -EINVAL;
-        *index = suboptimal.index;
-    } else
-        *index = optimal.index;
-
-    return 0;
-}
-
-
-/*********************************************************************
- *               GOVERNORS                                           *
- *********************************************************************/
-
-int __cpufreq_driver_target(struct cpufreq_policy *policy,
-                            unsigned int target_freq,
-                            unsigned int relation)
-{
-    int retval = -EINVAL;
-
-    if (cpu_online(policy->cpu) && cpufreq_driver->target)
-        retval = cpufreq_driver->target(policy, target_freq, relation);
-
-    return retval;
-}
-
-int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
-{
-    int ret = 0;
-
-    if (!policy)
-        return -EINVAL;
-
-    if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
-        ret = cpufreq_driver->getavg(policy->cpu);
-
-    return ret;
-}
-
-
-/*********************************************************************
- *                 POLICY                                            *
- *********************************************************************/
-
-/*
- * data   : current policy.
- * policy : policy to be set.
- */
-int __cpufreq_set_policy(struct cpufreq_policy *data,
-                                struct cpufreq_policy *policy)
-{
-    int ret = 0;
-
-    memcpy(&policy->cpuinfo, &data->cpuinfo, sizeof(struct cpufreq_cpuinfo));
-
-    if (policy->min > data->min && policy->min > policy->max)
-        return -EINVAL;
-
-    /* verify the cpu speed can be set within this limit */
-    ret = cpufreq_driver->verify(policy);
-    if (ret)
-        return ret;
-
-    data->min = policy->min;
-    data->max = policy->max;
-
-    if (policy->governor != data->governor) {
-        /* save old, working values */
-        struct cpufreq_governor *old_gov = data->governor;
-
-        /* end old governor */
-        if (data->governor)
-            __cpufreq_governor(data, CPUFREQ_GOV_STOP);
-
-        /* start new governor */
-        data->governor = policy->governor;
-        if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
-            /* new governor failed, so re-start old one */
-            if (old_gov) {
-                data->governor = old_gov;
-                __cpufreq_governor(data, CPUFREQ_GOV_START);
-            }
-            return -EINVAL;
-        }
-        /* might be a policy change, too, so fall through */
-    }
-
-    return __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
-}
diff -r 5274aa966231 -r 08374be21318 xen/arch/x86/acpi/pmstat.c
--- a/xen/arch/x86/acpi/pmstat.c        Fri Sep 26 11:12:29 2008 +0100
+++ /dev/null   Thu Jan 01 00:00:00 1970 +0000
@@ -1,155 +0,0 @@
-/*****************************************************************************
-#  pmstat.c - Power Management statistic information (Px/Cx/Tx, etc.)
-#
-#  Copyright (c) 2008, Liu Jinsong <jinsong.liu@xxxxxxxxx>
-#
-# This program is free software; you can redistribute it and/or modify it 
-# under the terms of the GNU General Public License as published by the Free 
-# Software Foundation; either version 2 of the License, or (at your option) 
-# any later version.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT 
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
-# more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc., 59 
-# Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-#
-# The full GNU General Public License is included in this distribution in the
-# file called LICENSE.
-#
-*****************************************************************************/
-
-#include <xen/config.h>
-#include <xen/lib.h>
-#include <xen/errno.h>
-#include <xen/sched.h>
-#include <xen/event.h>
-#include <xen/irq.h>
-#include <xen/iocap.h>
-#include <xen/compat.h>
-#include <xen/guest_access.h>
-#include <asm/current.h>
-#include <public/xen.h>
-#include <xen/cpumask.h>
-#include <asm/processor.h>
-#include <xen/percpu.h>
-#include <xen/domain.h>
-
-#include <public/sysctl.h>
-#include <acpi/cpufreq/cpufreq.h>
-
-struct pm_px *__read_mostly px_statistic_data[NR_CPUS];
-
-extern uint32_t pmstat_get_cx_nr(uint32_t cpuid);
-extern int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat);
-extern int pmstat_reset_cx_stat(uint32_t cpuid);
-
-int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
-{
-    int ret = 0;
-    const struct processor_pminfo *pmpt = processor_pminfo[op->cpuid];
-
-    if ( (op->cpuid >= NR_CPUS) || !cpu_online(op->cpuid) )
-        return -EINVAL;
-
-    switch ( op->type & PMSTAT_CATEGORY_MASK )
-    {
-    case PMSTAT_CX:
-        if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CX) )
-            return -ENODEV;
-        break;
-    case PMSTAT_PX:
-        if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
-            return -ENODEV;
-        if ( !pmpt || !(pmpt->perf.init & XEN_PX_INIT) )
-            return -EINVAL;
-        break;
-    default:
-        return -ENODEV;
-    }
-
-    switch ( op->type )
-    {
-    case PMSTAT_get_max_px:
-    {
-        op->u.getpx.total = pmpt->perf.state_count;
-        break;
-    }
-
-    case PMSTAT_get_pxstat:
-    {
-        uint64_t now, ct;
-        uint64_t total_idle_ns;
-        uint64_t tmp_idle_ns;
-        struct pm_px *pxpt = px_statistic_data[op->cpuid];
-
-        if ( !pxpt )
-            return -ENODATA;
-
-        total_idle_ns = get_cpu_idle_time(op->cpuid);
-        tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
-
-        now = NOW();
-        pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
-        pxpt->u.pt[pxpt->u.cur].residency += now - pxpt->prev_state_wall;
-        pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns;
-        pxpt->prev_state_wall = now;
-        pxpt->prev_idle_wall = total_idle_ns;
-
-        ct = pmpt->perf.state_count;
-        if ( copy_to_guest(op->u.getpx.trans_pt, pxpt->u.trans_pt, ct*ct) )
-        {
-            ret = -EFAULT;
-            break;
-        }
-
-        if ( copy_to_guest(op->u.getpx.pt, pxpt->u.pt, ct) )
-        {
-            ret = -EFAULT;
-            break;
-        }
-
-        op->u.getpx.total = pxpt->u.total;
-        op->u.getpx.usable = pxpt->u.usable;
-        op->u.getpx.last = pxpt->u.last;
-        op->u.getpx.cur = pxpt->u.cur;
-
-        break;
-    }
-
-    case PMSTAT_reset_pxstat:
-    {
-        px_statistic_reset(op->cpuid);
-        break;
-    }
-
-    case PMSTAT_get_max_cx:
-    {
-        op->u.getcx.nr = pmstat_get_cx_nr(op->cpuid);
-        ret = 0;
-        break;
-    }
-
-    case PMSTAT_get_cxstat:
-    {
-        ret = pmstat_get_cx_stat(op->cpuid, &op->u.getcx);
-        break;
-    }
-
-    case PMSTAT_reset_cxstat:
-    {
-        ret = pmstat_reset_cx_stat(op->cpuid);
-        break;
-    }
-
-    default:
-        printk("not defined sub-hypercall @ do_get_pm_info\n");
-        ret = -ENOSYS;
-        break;
-    }
-
-    return ret;
-}
diff -r 5274aa966231 -r 08374be21318 xen/drivers/Makefile
--- a/xen/drivers/Makefile      Fri Sep 26 11:12:29 2008 +0100
+++ b/xen/drivers/Makefile      Fri Sep 26 14:04:38 2008 +0100
@@ -1,4 +1,5 @@ subdir-y += char
 subdir-y += char
+subdir-y += cpufreq
 subdir-$(x86) += passthrough
 subdir-$(HAS_ACPI) += acpi
 subdir-$(HAS_VGA) += video
diff -r 5274aa966231 -r 08374be21318 xen/drivers/acpi/Makefile
--- a/xen/drivers/acpi/Makefile Fri Sep 26 11:12:29 2008 +0100
+++ b/xen/drivers/acpi/Makefile Fri Sep 26 14:04:38 2008 +0100
@@ -4,6 +4,7 @@ obj-y += tables.o
 obj-y += tables.o
 obj-y += numa.o
 obj-y += osl.o
+obj-y += pmstat.o
 
 obj-$(x86) += hwregs.o
 obj-$(x86) += reboot.o
diff -r 5274aa966231 -r 08374be21318 xen/drivers/acpi/pmstat.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/drivers/acpi/pmstat.c Fri Sep 26 14:04:38 2008 +0100
@@ -0,0 +1,155 @@
+/*****************************************************************************
+#  pmstat.c - Power Management statistic information (Px/Cx/Tx, etc.)
+#
+#  Copyright (c) 2008, Liu Jinsong <jinsong.liu@xxxxxxxxx>
+#
+# This program is free software; you can redistribute it and/or modify it 
+# under the terms of the GNU General Public License as published by the Free 
+# Software Foundation; either version 2 of the License, or (at your option) 
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT 
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59 
+# Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+#
+# The full GNU General Public License is included in this distribution in the
+# file called LICENSE.
+#
+*****************************************************************************/
+
+#include <xen/config.h>
+#include <xen/lib.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/event.h>
+#include <xen/irq.h>
+#include <xen/iocap.h>
+#include <xen/compat.h>
+#include <xen/guest_access.h>
+#include <asm/current.h>
+#include <public/xen.h>
+#include <xen/cpumask.h>
+#include <asm/processor.h>
+#include <xen/percpu.h>
+#include <xen/domain.h>
+
+#include <public/sysctl.h>
+#include <acpi/cpufreq/cpufreq.h>
+
+struct pm_px *__read_mostly cpufreq_statistic_data[NR_CPUS];
+
+extern uint32_t pmstat_get_cx_nr(uint32_t cpuid);
+extern int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat);
+extern int pmstat_reset_cx_stat(uint32_t cpuid);
+
+int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
+{
+    int ret = 0;
+    const struct processor_pminfo *pmpt = processor_pminfo[op->cpuid];
+
+    if ( (op->cpuid >= NR_CPUS) || !cpu_online(op->cpuid) )
+        return -EINVAL;
+
+    switch ( op->type & PMSTAT_CATEGORY_MASK )
+    {
+    case PMSTAT_CX:
+        if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CX) )
+            return -ENODEV;
+        break;
+    case PMSTAT_PX:
+        if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
+            return -ENODEV;
+        if ( !pmpt || !(pmpt->perf.init & XEN_PX_INIT) )
+            return -EINVAL;
+        break;
+    default:
+        return -ENODEV;
+    }
+
+    switch ( op->type )
+    {
+    case PMSTAT_get_max_px:
+    {
+        op->u.getpx.total = pmpt->perf.state_count;
+        break;
+    }
+
+    case PMSTAT_get_pxstat:
+    {
+        uint64_t now, ct;
+        uint64_t total_idle_ns;
+        uint64_t tmp_idle_ns;
+        struct pm_px *pxpt = cpufreq_statistic_data[op->cpuid];
+
+        if ( !pxpt )
+            return -ENODATA;
+
+        total_idle_ns = get_cpu_idle_time(op->cpuid);
+        tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
+
+        now = NOW();
+        pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
+        pxpt->u.pt[pxpt->u.cur].residency += now - pxpt->prev_state_wall;
+        pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns;
+        pxpt->prev_state_wall = now;
+        pxpt->prev_idle_wall = total_idle_ns;
+
+        ct = pmpt->perf.state_count;
+        if ( copy_to_guest(op->u.getpx.trans_pt, pxpt->u.trans_pt, ct*ct) )
+        {
+            ret = -EFAULT;
+            break;
+        }
+
+        if ( copy_to_guest(op->u.getpx.pt, pxpt->u.pt, ct) )
+        {
+            ret = -EFAULT;
+            break;
+        }
+
+        op->u.getpx.total = pxpt->u.total;
+        op->u.getpx.usable = pxpt->u.usable;
+        op->u.getpx.last = pxpt->u.last;
+        op->u.getpx.cur = pxpt->u.cur;
+
+        break;
+    }
+
+    case PMSTAT_reset_pxstat:
+    {
+        cpufreq_statistic_reset(op->cpuid);
+        break;
+    }
+
+    case PMSTAT_get_max_cx:
+    {
+        op->u.getcx.nr = pmstat_get_cx_nr(op->cpuid);
+        ret = 0;
+        break;
+    }
+
+    case PMSTAT_get_cxstat:
+    {
+        ret = pmstat_get_cx_stat(op->cpuid, &op->u.getcx);
+        break;
+    }
+
+    case PMSTAT_reset_cxstat:
+    {
+        ret = pmstat_reset_cx_stat(op->cpuid);
+        break;
+    }
+
+    default:
+        printk("not defined sub-hypercall @ do_get_pm_info\n");
+        ret = -ENOSYS;
+        break;
+    }
+
+    return ret;
+}
diff -r 5274aa966231 -r 08374be21318 xen/drivers/cpufreq/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/drivers/cpufreq/Makefile      Fri Sep 26 14:04:38 2008 +0100
@@ -0,0 +1,3 @@
+obj-y += cpufreq.o
+obj-y += cpufreq_ondemand.o
+obj-y += utility.o
diff -r 5274aa966231 -r 08374be21318 xen/drivers/cpufreq/cpufreq.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/drivers/cpufreq/cpufreq.c     Fri Sep 26 14:04:38 2008 +0100
@@ -0,0 +1,188 @@
+/*
+ *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@xxxxxxxxx>
+ *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@xxxxxxxxx>
+ *  Copyright (C) 2002 - 2004 Dominik Brodowski <linux@xxxxxxxx>
+ *  Copyright (C) 2006        Denis Sadykov <denis.m.sadykov@xxxxxxxxx>
+ *
+ *  Feb 2008 - Liu Jinsong <jinsong.liu@xxxxxxxxx>
+ *      Add cpufreq limit change handle and per-cpu cpufreq add/del
+ *      to cope with cpu hotplug
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or (at
+ *  your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <xen/types.h>
+#include <xen/errno.h>
+#include <xen/delay.h>
+#include <xen/cpumask.h>
+#include <xen/sched.h>
+#include <xen/timer.h>
+#include <xen/xmalloc.h>
+#include <asm/bug.h>
+#include <asm/msr.h>
+#include <asm/io.h>
+#include <asm/config.h>
+#include <asm/processor.h>
+#include <asm/percpu.h>
+#include <asm/cpufeature.h>
+#include <acpi/acpi.h>
+#include <acpi/cpufreq/cpufreq.h>
+
+/* TODO: change to link list later as domain number may be sparse */
+static cpumask_t cpufreq_dom_map[NR_CPUS];
+
+int cpufreq_limit_change(unsigned int cpu)
+{
+    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
+    struct cpufreq_policy *data = cpufreq_cpu_policy[cpu];
+    struct cpufreq_policy policy;
+
+    if (!cpu_online(cpu) || !data || !processor_pminfo[cpu])
+        return -ENODEV;
+
+    if ((perf->platform_limit < 0) || 
+        (perf->platform_limit >= perf->state_count))
+        return -EINVAL;
+
+    memcpy(&policy, data, sizeof(struct cpufreq_policy)); 
+
+    policy.max =
+        perf->states[perf->platform_limit].core_frequency * 1000;
+
+    return __cpufreq_set_policy(data, &policy);
+}
+
+int cpufreq_add_cpu(unsigned int cpu)
+{
+    int ret = 0;
+    unsigned int firstcpu;
+    unsigned int dom;
+    unsigned int j;
+    struct cpufreq_policy new_policy;
+    struct cpufreq_policy *policy;
+    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
+
+    /* to protect the case when Px was not controlled by xen */
+    if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
+        return 0;
+
+    if (cpu_is_offline(cpu) || cpufreq_cpu_policy[cpu])
+        return -EINVAL;
+
+    ret = cpufreq_statistic_init(cpu);
+    if (ret)
+        return ret;
+
+    dom = perf->domain_info.domain;
+    if (cpus_weight(cpufreq_dom_map[dom])) {
+        /* share policy with the first cpu since on same boat */
+        firstcpu = first_cpu(cpufreq_dom_map[dom]);
+        policy = cpufreq_cpu_policy[firstcpu];
+
+        cpufreq_cpu_policy[cpu] = policy;
+        cpu_set(cpu, cpufreq_dom_map[dom]);
+        cpu_set(cpu, policy->cpus);
+
+        printk(KERN_EMERG"adding CPU %u\n", cpu);
+    } else {
+        /* for the first cpu, setup policy and do init work */
+        policy = xmalloc(struct cpufreq_policy);
+        if (!policy) {
+            cpufreq_statistic_exit(cpu);
+            return -ENOMEM;
+        }
+        memset(policy, 0, sizeof(struct cpufreq_policy));
+
+        cpufreq_cpu_policy[cpu] = policy;
+        cpu_set(cpu, cpufreq_dom_map[dom]);
+        cpu_set(cpu, policy->cpus);
+
+        policy->cpu = cpu;
+        ret = cpufreq_driver->init(policy);
+        if (ret)
+            goto err1;
+        printk(KERN_EMERG"CPU %u initialization completed\n", cpu);
+    }
+
+    /*
+     * After get full cpumap of the coordination domain,
+     * we can safely start gov here.
+     */
+    if (cpus_weight(cpufreq_dom_map[dom]) ==
+        perf->domain_info.num_processors) {
+        memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
+        policy->governor = NULL;
+        ret = __cpufreq_set_policy(policy, &new_policy);
+        if (ret)
+            goto err2;
+    }
+
+    return 0;
+
+err2:
+    cpufreq_driver->exit(policy);
+err1:
+    for_each_cpu_mask(j, cpufreq_dom_map[dom]) {
+        cpufreq_cpu_policy[j] = NULL;
+        cpufreq_statistic_exit(j);
+    }
+
+    cpus_clear(cpufreq_dom_map[dom]);
+    xfree(policy);
+    return ret;
+}
+
+int cpufreq_del_cpu(unsigned int cpu)
+{
+    unsigned int dom;
+    struct cpufreq_policy *policy;
+    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
+
+    /* to protect the case when Px was not controlled by xen */
+    if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
+        return 0;
+
+    if (cpu_is_offline(cpu) || !cpufreq_cpu_policy[cpu])
+        return -EINVAL;
+
+    dom = perf->domain_info.domain;
+    policy = cpufreq_cpu_policy[cpu];
+
+    printk(KERN_EMERG"deleting CPU %u\n", cpu);
+
+    /* for the first cpu of the domain, stop gov */
+    if (cpus_weight(cpufreq_dom_map[dom]) ==
+        perf->domain_info.num_processors)
+        __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
+
+    cpufreq_cpu_policy[cpu] = NULL;
+    cpu_clear(cpu, policy->cpus);
+    cpu_clear(cpu, cpufreq_dom_map[dom]);
+    cpufreq_statistic_exit(cpu);
+
+    /* for the last cpu of the domain, clean room */
+    /* It's safe here to free freq_table, drv_data and policy */
+    if (!cpus_weight(cpufreq_dom_map[dom])) {
+        cpufreq_driver->exit(policy);
+        xfree(policy);
+    }
+
+    return 0;
+}
+
diff -r 5274aa966231 -r 08374be21318 xen/drivers/cpufreq/cpufreq_ondemand.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/drivers/cpufreq/cpufreq_ondemand.c    Fri Sep 26 14:04:38 2008 +0100
@@ -0,0 +1,246 @@
+/*
+ *  xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c
+ *
+ *  Copyright (C)  2001 Russell King
+ *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@xxxxxxxxx>.
+ *                      Jun Nakajima <jun.nakajima@xxxxxxxxx>
+ *             Feb 2008 Liu Jinsong <jinsong.liu@xxxxxxxxx>
+ *             Porting cpufreq_ondemand.c from Liunx 2.6.23 to Xen hypervisor 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <xen/types.h>
+#include <xen/percpu.h>
+#include <xen/cpumask.h>
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <xen/timer.h>
+#include <asm/config.h>
+#include <acpi/cpufreq/cpufreq.h>
+
+#define DEF_FREQUENCY_UP_THRESHOLD              (80)
+
+#define MIN_DBS_INTERVAL                        (MICROSECS(100))
+#define MIN_SAMPLING_MILLISECS                  (20)
+#define MIN_STAT_SAMPLING_RATE                   \
+    (MIN_SAMPLING_MILLISECS * MILLISECS(1))
+#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER    (1000)
+#define TRANSITION_LATENCY_LIMIT                (10 * 1000 )
+
+static uint64_t def_sampling_rate;
+
+/* Sampling types */
+enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
+
+static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
+
+static unsigned int dbs_enable;    /* number of CPUs using this policy */
+
+static struct dbs_tuners {
+    uint64_t     sampling_rate;
+    unsigned int up_threshold;
+    unsigned int ignore_nice;
+    unsigned int powersave_bias;
+} dbs_tuners_ins = {
+    .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
+    .ignore_nice = 0,
+    .powersave_bias = 0,
+};
+
+static struct timer dbs_timer[NR_CPUS];
+
+uint64_t get_cpu_idle_time(unsigned int cpu)
+{
+    uint64_t idle_ns;
+    struct vcpu *v;
+
+    if ((v = idle_vcpu[cpu]) == NULL)
+        return 0;
+
+    idle_ns = v->runstate.time[RUNSTATE_running];
+    if (v->is_running)
+        idle_ns += NOW() - v->runstate.state_entry_time;
+
+    return idle_ns;
+}
+
+static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
+{
+    unsigned int load = 0;
+    uint64_t cur_ns, idle_ns, total_ns;
+
+    struct cpufreq_policy *policy;
+    unsigned int j;
+
+    if (!this_dbs_info->enable)
+        return;
+
+    policy = this_dbs_info->cur_policy;
+
+    if (unlikely(policy->resume)) {
+        __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H);
+        return;
+    }
+
+    cur_ns = NOW();
+    total_ns = cur_ns - this_dbs_info->prev_cpu_wall;
+    this_dbs_info->prev_cpu_wall = NOW();
+
+    if (total_ns < MIN_DBS_INTERVAL)
+        return;
+
+    /* Get Idle Time */
+    idle_ns = UINT_MAX;
+    for_each_cpu_mask(j, policy->cpus) {
+        uint64_t total_idle_ns;
+        unsigned int tmp_idle_ns;
+        struct cpu_dbs_info_s *j_dbs_info;
+
+        j_dbs_info = &per_cpu(cpu_dbs_info, j);
+        total_idle_ns = get_cpu_idle_time(j);
+        tmp_idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle;
+        j_dbs_info->prev_cpu_idle = total_idle_ns;
+
+        if (tmp_idle_ns < idle_ns)
+            idle_ns = tmp_idle_ns;
+    }
+
+    if (likely(total_ns > idle_ns))
+        load = (100 * (total_ns - idle_ns)) / total_ns;
+
+    /* Check for frequency increase */
+    if (load > dbs_tuners_ins.up_threshold) {
+        /* if we are already at full speed then break out early */
+        if (policy->cur == policy->max)
+            return;
+        __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H);
+        return;
+    }
+
+    /* Check for frequency decrease */
+    /* if we cannot reduce the frequency anymore, break out early */
+    if (policy->cur == policy->min)
+        return;
+
+    /*
+     * The optimal frequency is the frequency that is the lowest that
+     * can support the current CPU usage without triggering the up
+     * policy. To be safe, we focus 10 points under the threshold.
+     */
+    if (load < (dbs_tuners_ins.up_threshold - 10)) {
+        unsigned int freq_next, freq_cur;
+
+        freq_cur = __cpufreq_driver_getavg(policy);
+        if (!freq_cur)
+            freq_cur = policy->cur;
+
+        freq_next = (freq_cur * load) / (dbs_tuners_ins.up_threshold - 10);
+
+        __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
+    }
+}
+
+static void do_dbs_timer(void *dbs)
+{
+    struct cpu_dbs_info_s *dbs_info = (struct cpu_dbs_info_s *)dbs;
+
+    if (!dbs_info->enable)
+        return;
+
+    dbs_check_cpu(dbs_info);
+
+    set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
+}
+
+static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
+{
+    dbs_info->enable = 1;
+
+    init_timer(&dbs_timer[dbs_info->cpu], do_dbs_timer, 
+        (void *)dbs_info, dbs_info->cpu);
+
+    set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
+}
+
+static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
+{
+    dbs_info->enable = 0;
+    stop_timer(&dbs_timer[dbs_info->cpu]);
+}
+
+int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event)
+{
+    unsigned int cpu = policy->cpu;
+    struct cpu_dbs_info_s *this_dbs_info;
+    unsigned int j;
+
+    this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
+
+    switch (event) {
+    case CPUFREQ_GOV_START:
+        if ((!cpu_online(cpu)) || (!policy->cur))
+            return -EINVAL;
+
+        if (policy->cpuinfo.transition_latency >
+            (TRANSITION_LATENCY_LIMIT * 1000)) {
+            printk(KERN_WARNING "ondemand governor failed to load "
+                "due to too long transition latency\n");
+            return -EINVAL;
+        }
+        if (this_dbs_info->enable)
+            /* Already enabled */
+            break;
+
+        dbs_enable++;
+
+        for_each_cpu_mask(j, policy->cpus) {
+            struct cpu_dbs_info_s *j_dbs_info;
+            j_dbs_info = &per_cpu(cpu_dbs_info, j);
+            j_dbs_info->cur_policy = policy;
+
+            j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
+            j_dbs_info->prev_cpu_wall = NOW();
+        }
+        this_dbs_info->cpu = cpu;
+        /*
+         * Start the timerschedule work, when this governor
+         * is used for first time
+         */
+        if (dbs_enable == 1) {
+            def_sampling_rate = policy->cpuinfo.transition_latency *
+                DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
+
+            if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
+                def_sampling_rate = MIN_STAT_SAMPLING_RATE;
+
+            dbs_tuners_ins.sampling_rate = def_sampling_rate;
+        }
+        dbs_timer_init(this_dbs_info);
+
+        break;
+
+    case CPUFREQ_GOV_STOP:
+        dbs_timer_exit(this_dbs_info);
+        dbs_enable--;
+
+        break;
+
+    case CPUFREQ_GOV_LIMITS:
+        if (policy->max < this_dbs_info->cur_policy->cur)
+            __cpufreq_driver_target(this_dbs_info->cur_policy,
+                policy->max, CPUFREQ_RELATION_H);
+        else if (policy->min > this_dbs_info->cur_policy->cur)
+            __cpufreq_driver_target(this_dbs_info->cur_policy,
+                policy->min, CPUFREQ_RELATION_L);
+        break;
+    }
+    return 0;
+}
+
+struct cpufreq_governor cpufreq_gov_dbs = {
+    .name = "ondemand",
+    .governor = cpufreq_governor_dbs,
+};
diff -r 5274aa966231 -r 08374be21318 xen/drivers/cpufreq/utility.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/drivers/cpufreq/utility.c     Fri Sep 26 14:04:38 2008 +0100
@@ -0,0 +1,368 @@
+/*
+ *  utility.c - misc functions for cpufreq driver and Px statistic
+ *
+ *  Copyright (C) 2001 Russell King
+ *            (C) 2002 - 2003 Dominik Brodowski <linux@xxxxxxxx>
+ *
+ *  Oct 2005 - Ashok Raj <ashok.raj@xxxxxxxxx>
+ *    Added handling for CPU hotplug
+ *  Feb 2006 - Jacob Shin <jacob.shin@xxxxxxx>
+ *    Fix handling for CPU hotplug -- affected CPUs
+ *  Feb 2008 - Liu Jinsong <jinsong.liu@xxxxxxxxx>
+ *    1. Merge cpufreq.c and freq_table.c of linux 2.6.23
+ *    And poring to Xen hypervisor
+ *    2. some Px statistic interface funcdtions
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <xen/errno.h>
+#include <xen/cpumask.h>
+#include <xen/types.h>
+#include <xen/spinlock.h>
+#include <xen/percpu.h>
+#include <xen/types.h>
+#include <xen/sched.h>
+#include <xen/timer.h>
+#include <asm/config.h>
+#include <acpi/cpufreq/cpufreq.h>
+#include <public/sysctl.h>
+
+struct cpufreq_driver   *cpufreq_driver;
+struct processor_pminfo *__read_mostly processor_pminfo[NR_CPUS];
+struct cpufreq_policy   *__read_mostly cpufreq_cpu_policy[NR_CPUS];
+
+/*********************************************************************
+ *                    Px STATISTIC INFO                              *
+ *********************************************************************/
+
+void cpufreq_statistic_update(cpumask_t cpumask, uint8_t from, uint8_t to)
+{
+    uint32_t i;
+    uint64_t now;
+
+    now = NOW();
+
+    for_each_cpu_mask(i, cpumask) {
+        struct pm_px *pxpt = cpufreq_statistic_data[i];
+        struct processor_pminfo *pmpt = processor_pminfo[i];
+        uint64_t total_idle_ns;
+        uint64_t tmp_idle_ns;
+
+        if ( !pxpt || !pmpt )
+            continue;
+
+        total_idle_ns = get_cpu_idle_time(i);
+        tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
+
+        pxpt->u.last = from;
+        pxpt->u.cur = to;
+        pxpt->u.pt[to].count++;
+        pxpt->u.pt[from].residency += now - pxpt->prev_state_wall;
+        pxpt->u.pt[from].residency -= tmp_idle_ns;
+
+        (*(pxpt->u.trans_pt + from * pmpt->perf.state_count + to))++;
+
+        pxpt->prev_state_wall = now;
+        pxpt->prev_idle_wall = total_idle_ns;
+    }
+}
+
+int cpufreq_statistic_init(unsigned int cpuid)
+{
+    uint32_t i, count;
+    struct pm_px *pxpt = cpufreq_statistic_data[cpuid];
+    const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
+
+    count = pmpt->perf.state_count;
+
+    if ( !pmpt )
+        return -EINVAL;
+
+    if ( !pxpt )
+    {
+        pxpt = xmalloc(struct pm_px);
+        if ( !pxpt )
+            return -ENOMEM;
+        memset(pxpt, 0, sizeof(*pxpt));
+        cpufreq_statistic_data[cpuid] = pxpt;
+    }
+
+    pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count);
+    if (!pxpt->u.trans_pt)
+        return -ENOMEM;
+
+    pxpt->u.pt = xmalloc_array(struct pm_px_val, count);
+    if (!pxpt->u.pt) {
+        xfree(pxpt->u.trans_pt);
+        return -ENOMEM;
+    }
+
+    memset(pxpt->u.trans_pt, 0, count * count * (sizeof(uint64_t)));
+    memset(pxpt->u.pt, 0, count * (sizeof(struct pm_px_val)));
+
+    pxpt->u.total = pmpt->perf.state_count;
+    pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
+
+    for (i=0; i < pmpt->perf.state_count; i++)
+        pxpt->u.pt[i].freq = pmpt->perf.states[i].core_frequency;
+
+    pxpt->prev_state_wall = NOW();
+    pxpt->prev_idle_wall = get_cpu_idle_time(cpuid);
+
+    return 0;
+}
+
+void cpufreq_statistic_exit(unsigned int cpuid)
+{
+    struct pm_px *pxpt = cpufreq_statistic_data[cpuid];
+
+    if (!pxpt)
+        return;
+    xfree(pxpt->u.trans_pt);
+    xfree(pxpt->u.pt);
+    memset(pxpt, 0, sizeof(struct pm_px));
+}
+
+void cpufreq_statistic_reset(unsigned int cpuid)
+{
+    uint32_t i, j, count;
+    struct pm_px *pxpt = cpufreq_statistic_data[cpuid];
+    const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
+
+    if ( !pxpt || !pmpt )
+        return;
+
+    count = pmpt->perf.state_count;
+
+    for (i=0; i < count; i++) {
+        pxpt->u.pt[i].residency = 0;
+        pxpt->u.pt[i].count = 0;
+
+        for (j=0; j < count; j++)
+            *(pxpt->u.trans_pt + i*count + j) = 0;
+    }
+
+    pxpt->prev_state_wall = NOW();
+    pxpt->prev_idle_wall = get_cpu_idle_time(cpuid);
+}
+
+
+/*********************************************************************
+ *                   FREQUENCY TABLE HELPERS                         *
+ *********************************************************************/
+
+int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
+                                    struct cpufreq_frequency_table *table)
+{
+    unsigned int min_freq = ~0;
+    unsigned int max_freq = 0;
+    unsigned int i;
+
+    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
+        unsigned int freq = table[i].frequency;
+        if (freq == CPUFREQ_ENTRY_INVALID)
+            continue;
+        if (freq < min_freq)
+            min_freq = freq;
+        if (freq > max_freq)
+            max_freq = freq;
+    }
+
+    policy->min = policy->cpuinfo.min_freq = min_freq;
+    policy->max = policy->cpuinfo.max_freq = max_freq;
+
+    if (policy->min == ~0)
+        return -EINVAL;
+    else
+        return 0;
+}
+
+int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
+                                   struct cpufreq_frequency_table *table)
+{
+    unsigned int next_larger = ~0;
+    unsigned int i;
+    unsigned int count = 0;
+
+    if (!cpu_online(policy->cpu))
+        return -EINVAL;
+
+    cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+                                 policy->cpuinfo.max_freq);
+
+    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
+        unsigned int freq = table[i].frequency;
+        if (freq == CPUFREQ_ENTRY_INVALID)
+            continue;
+        if ((freq >= policy->min) && (freq <= policy->max))
+            count++;
+        else if ((next_larger > freq) && (freq > policy->max))
+            next_larger = freq;
+    }
+
+    if (!count)
+        policy->max = next_larger;
+
+    cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq,
+                                 policy->cpuinfo.max_freq);
+
+    return 0;
+}
+
+int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
+                                   struct cpufreq_frequency_table *table,
+                                   unsigned int target_freq,
+                                   unsigned int relation,
+                                   unsigned int *index)
+{
+    struct cpufreq_frequency_table optimal = {
+        .index = ~0,
+        .frequency = 0,
+    };
+    struct cpufreq_frequency_table suboptimal = {
+        .index = ~0,
+        .frequency = 0,
+    };
+    unsigned int i;
+
+    switch (relation) {
+    case CPUFREQ_RELATION_H:
+        suboptimal.frequency = ~0;
+        break;
+    case CPUFREQ_RELATION_L:
+        optimal.frequency = ~0;
+        break;
+    }
+
+    if (!cpu_online(policy->cpu))
+        return -EINVAL;
+
+    for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) {
+        unsigned int freq = table[i].frequency;
+        if (freq == CPUFREQ_ENTRY_INVALID)
+            continue;
+        if ((freq < policy->min) || (freq > policy->max))
+            continue;
+        switch(relation) {
+        case CPUFREQ_RELATION_H:
+            if (freq <= target_freq) {
+                if (freq >= optimal.frequency) {
+                    optimal.frequency = freq;
+                    optimal.index = i;
+                }
+            } else {
+                if (freq <= suboptimal.frequency) {
+                    suboptimal.frequency = freq;
+                    suboptimal.index = i;
+                }
+            }
+            break;
+        case CPUFREQ_RELATION_L:
+            if (freq >= target_freq) {
+                if (freq <= optimal.frequency) {
+                    optimal.frequency = freq;
+                    optimal.index = i;
+                }
+            } else {
+                if (freq >= suboptimal.frequency) {
+                    suboptimal.frequency = freq;
+                    suboptimal.index = i;
+                }
+            }
+            break;
+        }
+    }
+    if (optimal.index > i) {
+        if (suboptimal.index > i)
+            return -EINVAL;
+        *index = suboptimal.index;
+    } else
+        *index = optimal.index;
+
+    return 0;
+}
+
+
+/*********************************************************************
+ *               GOVERNORS                                           *
+ *********************************************************************/
+
+int __cpufreq_driver_target(struct cpufreq_policy *policy,
+                            unsigned int target_freq,
+                            unsigned int relation)
+{
+    int retval = -EINVAL;
+
+    if (cpu_online(policy->cpu) && cpufreq_driver->target)
+        retval = cpufreq_driver->target(policy, target_freq, relation);
+
+    return retval;
+}
+
+int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
+{
+    int ret = 0;
+
+    if (!policy)
+        return -EINVAL;
+
+    if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
+        ret = cpufreq_driver->getavg(policy->cpu);
+
+    return ret;
+}
+
+
+/*********************************************************************
+ *                 POLICY                                            *
+ *********************************************************************/
+
+/*
+ * data   : current policy.
+ * policy : policy to be set.
+ */
+int __cpufreq_set_policy(struct cpufreq_policy *data,
+                                struct cpufreq_policy *policy)
+{
+    int ret = 0;
+
+    memcpy(&policy->cpuinfo, &data->cpuinfo, sizeof(struct cpufreq_cpuinfo));
+
+    if (policy->min > data->min && policy->min > policy->max)
+        return -EINVAL;
+
+    /* verify the cpu speed can be set within this limit */
+    ret = cpufreq_driver->verify(policy);
+    if (ret)
+        return ret;
+
+    data->min = policy->min;
+    data->max = policy->max;
+
+    if (policy->governor != data->governor) {
+        /* save old, working values */
+        struct cpufreq_governor *old_gov = data->governor;
+
+        /* end old governor */
+        if (data->governor)
+            __cpufreq_governor(data, CPUFREQ_GOV_STOP);
+
+        /* start new governor */
+        data->governor = policy->governor;
+        if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
+            /* new governor failed, so re-start old one */
+            if (old_gov) {
+                data->governor = old_gov;
+                __cpufreq_governor(data, CPUFREQ_GOV_START);
+            }
+            return -EINVAL;
+        }
+        /* might be a policy change, too, so fall through */
+    }
+
+    return __cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
+}
diff -r 5274aa966231 -r 08374be21318 xen/include/acpi/cpufreq/cpufreq.h
--- a/xen/include/acpi/cpufreq/cpufreq.h        Fri Sep 26 11:12:29 2008 +0100
+++ b/xen/include/acpi/cpufreq/cpufreq.h        Fri Sep 26 14:04:38 2008 +0100
@@ -20,6 +20,13 @@
 #define CPUFREQ_NAME_LEN 16
 
 struct cpufreq_governor;
+
+struct acpi_cpufreq_data {
+    struct processor_performance *acpi_data;
+    struct cpufreq_frequency_table *freq_table;
+    unsigned int max_freq;
+    unsigned int cpu_feature;
+};
 
 struct cpufreq_cpuinfo {
     unsigned int        max_freq;
diff -r 5274aa966231 -r 08374be21318 xen/include/acpi/cpufreq/processor_perf.h
--- a/xen/include/acpi/cpufreq/processor_perf.h Fri Sep 26 11:12:29 2008 +0100
+++ b/xen/include/acpi/cpufreq/processor_perf.h Fri Sep 26 14:04:38 2008 +0100
@@ -9,10 +9,10 @@ int get_cpu_id(u8);
 int get_cpu_id(u8);
 int powernow_cpufreq_init(void);
 
-void px_statistic_update(cpumask_t, uint8_t, uint8_t);
-int  px_statistic_init(unsigned int);
-void px_statistic_exit(unsigned int);
-void px_statistic_reset(unsigned int);
+void cpufreq_statistic_update(cpumask_t, uint8_t, uint8_t);
+int  cpufreq_statistic_init(unsigned int);
+void cpufreq_statistic_exit(unsigned int);
+void cpufreq_statistic_reset(unsigned int);
 
 int  cpufreq_limit_change(unsigned int);
 
@@ -58,6 +58,6 @@ struct pm_px {
     uint64_t prev_idle_wall;
 };
 
-extern struct pm_px *px_statistic_data[NR_CPUS];
+extern struct pm_px *cpufreq_statistic_data[NR_CPUS];
 
 #endif /* __XEN_PROCESSOR_PM_H__ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] X86 and IA64: Rebase cpufreq logic for supporting both x86 and ia64, Xen patchbot-unstable <=