# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1239704455 -3600
# Node ID 0108af6efdaeec041676f2dc00ad8327e95f1267
# Parent e15d30dfb6003e10b1cc4189d7c25fb3a53ac4d1
Fix cpufreq HW-ALL coordination handle
Currently cpufreq HW-ALL coordination is handled same way as SW-ALL.
However, SW-ALL will bring more IPIs which is bad for cpuidle.
This patch implement HW-ALL coordination handled in different way from
SW-ALL, for the sake of performance and reduce IPIs. We also
suspend/resume HW-ALL dbs timer for idle.
Signed-off-by: Yu, Ke <ke.yu@xxxxxxxxx>
Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>
Signed-off-by: Tian, Kevin <kevin.tian@xxxxxxxxx>
---
xen/arch/x86/acpi/cpu_idle.c | 7 +
xen/arch/x86/acpi/cpufreq/cpufreq.c | 6 +
xen/drivers/cpufreq/cpufreq.c | 149 +++++++++++++++++----------------
xen/drivers/cpufreq/cpufreq_ondemand.c | 41 +++++++++
xen/include/acpi/cpufreq/cpufreq.h | 4
5 files changed, 136 insertions(+), 71 deletions(-)
diff -r e15d30dfb600 -r 0108af6efdae xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c Tue Apr 14 11:20:02 2009 +0100
+++ b/xen/arch/x86/acpi/cpu_idle.c Tue Apr 14 11:20:55 2009 +0100
@@ -47,6 +47,7 @@
#include <asm/processor.h>
#include <public/platform.h>
#include <public/sysctl.h>
+#include <acpi/cpufreq/cpufreq.h>
/*#define DEBUG_PM_CX*/
@@ -195,6 +196,8 @@ static void acpi_processor_idle(void)
int sleep_ticks = 0;
u32 t1, t2 = 0;
+ cpufreq_dbs_timer_suspend();
+
sched_tick_suspend();
/*
* sched_tick_suspend may raise TIMER_SOFTIRQ by __stop_timer,
@@ -214,6 +217,7 @@ static void acpi_processor_idle(void)
{
local_irq_enable();
sched_tick_resume();
+ cpufreq_dbs_timer_resume();
return;
}
@@ -234,6 +238,7 @@ static void acpi_processor_idle(void)
else
acpi_safe_halt();
sched_tick_resume();
+ cpufreq_dbs_timer_resume();
return;
}
@@ -341,6 +346,7 @@ static void acpi_processor_idle(void)
default:
local_irq_enable();
sched_tick_resume();
+ cpufreq_dbs_timer_resume();
return;
}
@@ -352,6 +358,7 @@ static void acpi_processor_idle(void)
}
sched_tick_resume();
+ cpufreq_dbs_timer_resume();
if ( cpuidle_current_governor->reflect )
cpuidle_current_governor->reflect(power);
diff -r e15d30dfb600 -r 0108af6efdae xen/arch/x86/acpi/cpufreq/cpufreq.c
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c Tue Apr 14 11:20:02 2009 +0100
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c Tue Apr 14 11:20:55 2009 +0100
@@ -191,7 +191,11 @@ static void drv_read(struct drv_cmd *cmd
static void drv_write(struct drv_cmd *cmd)
{
- on_selected_cpus( cmd->mask, do_drv_write, (void *)cmd, 0, 0);
+ if ((cpus_weight(cmd->mask) == 1) &&
+ cpu_isset(smp_processor_id(), cmd->mask))
+ do_drv_write((void *)cmd);
+ else
+ on_selected_cpus( cmd->mask, do_drv_write, (void *)cmd, 0, 0);
}
static u32 get_cur_val(cpumask_t mask)
diff -r e15d30dfb600 -r 0108af6efdae xen/drivers/cpufreq/cpufreq.c
--- a/xen/drivers/cpufreq/cpufreq.c Tue Apr 14 11:20:02 2009 +0100
+++ b/xen/drivers/cpufreq/cpufreq.c Tue Apr 14 11:20:55 2009 +0100
@@ -130,7 +130,7 @@ int cpufreq_add_cpu(unsigned int cpu)
int ret = 0;
unsigned int firstcpu;
unsigned int dom, domexist = 0;
- unsigned int j;
+ unsigned int hw_all = 0;
struct list_head *pos;
struct cpufreq_dom *cpufreq_dom = NULL;
struct cpufreq_policy new_policy;
@@ -146,9 +146,8 @@ int cpufreq_add_cpu(unsigned int cpu)
if (cpufreq_cpu_policy[cpu])
return 0;
- ret = cpufreq_statistic_init(cpu);
- if (ret)
- return ret;
+ if (perf->shared_type == CPUFREQ_SHARED_TYPE_HW)
+ hw_all = 1;
dom = perf->domain_info.domain;
@@ -160,61 +159,57 @@ int cpufreq_add_cpu(unsigned int cpu)
}
}
- if (domexist) {
- /* share policy with the first cpu since on same boat */
+ if (!domexist) {
+ cpufreq_dom = xmalloc(struct cpufreq_dom);
+ if (!cpufreq_dom)
+ return -ENOMEM;
+
+ memset(cpufreq_dom, 0, sizeof(struct cpufreq_dom));
+ cpufreq_dom->dom = dom;
+ list_add(&cpufreq_dom->node, &cpufreq_dom_list_head);
+ } else {
+ /* domain sanity check under whatever coordination type */
+ firstcpu = first_cpu(cpufreq_dom->map);
+ if ((perf->domain_info.coord_type !=
+ processor_pminfo[firstcpu]->perf.domain_info.coord_type) ||
+ (perf->domain_info.num_processors !=
+ processor_pminfo[firstcpu]->perf.domain_info.num_processors)) {
+ return -EINVAL;
+ }
+ }
+
+ if (!domexist || hw_all) {
+ policy = xmalloc(struct cpufreq_policy);
+ if (!policy)
+ ret = -ENOMEM;
+
+ memset(policy, 0, sizeof(struct cpufreq_policy));
+ policy->cpu = cpu;
+ cpufreq_cpu_policy[cpu] = policy;
+
+ ret = cpufreq_driver->init(policy);
+ if (ret) {
+ xfree(policy);
+ return ret;
+ }
+ printk(KERN_EMERG"CPU %u initialization completed\n", cpu);
+ } else {
firstcpu = first_cpu(cpufreq_dom->map);
policy = cpufreq_cpu_policy[firstcpu];
cpufreq_cpu_policy[cpu] = policy;
- cpu_set(cpu, cpufreq_dom->map);
- cpu_set(cpu, policy->cpus);
-
- /* domain coordination sanity check */
- if ((perf->domain_info.coord_type !=
- processor_pminfo[firstcpu]->perf.domain_info.coord_type) ||
- (perf->domain_info.num_processors !=
- processor_pminfo[firstcpu]->perf.domain_info.num_processors)) {
- ret = -EINVAL;
- goto err2;
- }
-
printk(KERN_EMERG"adding CPU %u\n", cpu);
- } else {
- cpufreq_dom = xmalloc(struct cpufreq_dom);
- if (!cpufreq_dom) {
- cpufreq_statistic_exit(cpu);
- return -ENOMEM;
- }
- memset(cpufreq_dom, 0, sizeof(struct cpufreq_dom));
- cpufreq_dom->dom = dom;
- cpu_set(cpu, cpufreq_dom->map);
- list_add(&cpufreq_dom->node, &cpufreq_dom_list_head);
-
- /* for the first cpu, setup policy and do init work */
- policy = xmalloc(struct cpufreq_policy);
- if (!policy) {
- list_del(&cpufreq_dom->node);
- xfree(cpufreq_dom);
- cpufreq_statistic_exit(cpu);
- return -ENOMEM;
- }
- memset(policy, 0, sizeof(struct cpufreq_policy));
- policy->cpu = cpu;
- cpu_set(cpu, policy->cpus);
- cpufreq_cpu_policy[cpu] = policy;
-
- ret = cpufreq_driver->init(policy);
- if (ret)
- goto err1;
- printk(KERN_EMERG"CPU %u initialization completed\n", cpu);
- }
-
- /*
- * After get full cpumap of the coordination domain,
- * we can safely start gov here.
- */
- if (cpus_weight(cpufreq_dom->map) ==
- perf->domain_info.num_processors) {
+ }
+
+ cpu_set(cpu, policy->cpus);
+ cpu_set(cpu, cpufreq_dom->map);
+
+ ret = cpufreq_statistic_init(cpu);
+ if (ret)
+ goto err1;
+
+ if (hw_all ||
+ (cpus_weight(cpufreq_dom->map) == perf->domain_info.num_processors)) {
memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
policy->governor = NULL;
@@ -240,22 +235,29 @@ int cpufreq_add_cpu(unsigned int cpu)
return 0;
err2:
- cpufreq_driver->exit(policy);
+ cpufreq_statistic_exit(cpu);
err1:
- for_each_cpu_mask(j, cpufreq_dom->map) {
- cpufreq_cpu_policy[j] = NULL;
- cpufreq_statistic_exit(j);
- }
-
- list_del(&cpufreq_dom->node);
- xfree(cpufreq_dom);
- xfree(policy);
+ cpufreq_cpu_policy[cpu] = NULL;
+ cpu_clear(cpu, policy->cpus);
+ cpu_clear(cpu, cpufreq_dom->map);
+
+ if (cpus_empty(policy->cpus)) {
+ cpufreq_driver->exit(policy);
+ xfree(policy);
+ }
+
+ if (cpus_empty(cpufreq_dom->map)) {
+ list_del(&cpufreq_dom->node);
+ xfree(cpufreq_dom);
+ }
+
return ret;
}
int cpufreq_del_cpu(unsigned int cpu)
{
unsigned int dom, domexist = 0;
+ unsigned int hw_all = 0;
struct list_head *pos;
struct cpufreq_dom *cpufreq_dom = NULL;
struct cpufreq_policy *policy;
@@ -269,6 +271,9 @@ int cpufreq_del_cpu(unsigned int cpu)
if (!cpufreq_cpu_policy[cpu])
return 0;
+
+ if (perf->shared_type == CPUFREQ_SHARED_TYPE_HW)
+ hw_all = 1;
dom = perf->domain_info.domain;
policy = cpufreq_cpu_policy[cpu];
@@ -284,23 +289,27 @@ int cpufreq_del_cpu(unsigned int cpu)
if (!domexist)
return -EINVAL;
- /* for the first cpu of the domain, stop gov */
- if (cpus_weight(cpufreq_dom->map) ==
- perf->domain_info.num_processors)
+ /* for HW_ALL, stop gov for each core of the _PSD domain */
+ /* for SW_ALL & SW_ANY, stop gov for the 1st core of the _PSD domain */
+ if (hw_all ||
+ (cpus_weight(cpufreq_dom->map) == perf->domain_info.num_processors))
__cpufreq_governor(policy, CPUFREQ_GOV_STOP);
+ cpufreq_statistic_exit(cpu);
cpufreq_cpu_policy[cpu] = NULL;
cpu_clear(cpu, policy->cpus);
cpu_clear(cpu, cpufreq_dom->map);
- cpufreq_statistic_exit(cpu);
+
+ if (cpus_empty(policy->cpus)) {
+ cpufreq_driver->exit(policy);
+ xfree(policy);
+ }
/* for the last cpu of the domain, clean room */
/* It's safe here to free freq_table, drv_data and policy */
- if (!cpus_weight(cpufreq_dom->map)) {
- cpufreq_driver->exit(policy);
+ if (cpus_empty(cpufreq_dom->map)) {
list_del(&cpufreq_dom->node);
xfree(cpufreq_dom);
- xfree(policy);
}
printk(KERN_EMERG"deleting CPU %u\n", cpu);
diff -r e15d30dfb600 -r 0108af6efdae xen/drivers/cpufreq/cpufreq_ondemand.c
--- a/xen/drivers/cpufreq/cpufreq_ondemand.c Tue Apr 14 11:20:02 2009 +0100
+++ b/xen/drivers/cpufreq/cpufreq_ondemand.c Tue Apr 14 11:20:55 2009 +0100
@@ -190,6 +190,12 @@ static void dbs_timer_init(struct cpu_db
(void *)dbs_info, dbs_info->cpu);
set_timer(&dbs_timer[dbs_info->cpu], NOW()+dbs_tuners_ins.sampling_rate);
+
+ if ( processor_pminfo[dbs_info->cpu]->perf.shared_type
+ == CPUFREQ_SHARED_TYPE_HW )
+ {
+ dbs_info->stoppable = 1;
+ }
}
static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
@@ -337,3 +343,38 @@ static void __exit cpufreq_gov_dbs_exit(
cpufreq_unregister_governor(&cpufreq_gov_dbs);
}
__exitcall(cpufreq_gov_dbs_exit);
+
+void cpufreq_dbs_timer_suspend(void)
+{
+ int cpu;
+
+ cpu = smp_processor_id();
+
+ if ( per_cpu(cpu_dbs_info,cpu).stoppable )
+ {
+ stop_timer( &dbs_timer[cpu] );
+ }
+}
+
+void cpufreq_dbs_timer_resume(void)
+{
+ int cpu;
+ struct timer* t;
+ s_time_t now;
+
+ cpu = smp_processor_id();
+
+ if ( per_cpu(cpu_dbs_info,cpu).stoppable )
+ {
+ now = NOW();
+ t = &dbs_timer[cpu];
+ if (t->expires <= now)
+ {
+ t->function(t->data);
+ }
+ else
+ {
+ set_timer(t, align_timer(now , dbs_tuners_ins.sampling_rate));
+ }
+ }
+}
diff -r e15d30dfb600 -r 0108af6efdae xen/include/acpi/cpufreq/cpufreq.h
--- a/xen/include/acpi/cpufreq/cpufreq.h Tue Apr 14 11:20:02 2009 +0100
+++ b/xen/include/acpi/cpufreq/cpufreq.h Tue Apr 14 11:20:55 2009 +0100
@@ -221,6 +221,7 @@ struct cpu_dbs_info_s {
struct cpufreq_frequency_table *freq_table;
int cpu;
unsigned int enable:1;
+ unsigned int stoppable:1;
};
int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event);
@@ -232,4 +233,7 @@ int write_ondemand_up_threshold(unsigned
int write_ondemand_up_threshold(unsigned int up_threshold);
int write_userspace_scaling_setspeed(unsigned int cpu, unsigned int freq);
+
+void cpufreq_dbs_timer_suspend(void);
+void cpufreq_dbs_timer_resume(void);
#endif /* __XEN_CPUFREQ_PM_H__ */
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|