Enable cpufreq support in Xen for AMD Operton processors by:
1) Allowing the PowerNow! driver in dom0 to write to the PowerNow!
MSRs.
2) Adding the cpufreq notifier chain to time-xen.c in dom0.
On a frequency change, a platform hypercall is performed to
scale the frequency multiplier in the hypervisor.
3) Adding a platform hypercall to the hypervisor the scale
the frequency multiplier and reset the time stamps so that
next calibration remains reasonably correct.
4) Adding the cpufreq Xen option which pins the VCPUs to
the physical CPU cores.
Patch 1 covers the frequency scaling platform call in Xen.
Patch 2 allows MSR accesses from the PowerNow! driver.
Patch 3 covers the frequency scaling platform call in Linux.
Patch 4 covers the changes necessary to the PowerNow! driver
to make it correctly associate shared cores under Xen.
This code can be readily expanded to cover Intel or other
non-AMD processors by modifying xen/arch/x8/traps.c to
allow the appropriate MSR accesses.
Caveat: currently, this code does not support the in-kernel
ondemand cpufreq governor. Dom0 must run a userspace
daemon to monitor the utilization of the physical cpus
with the getcpuinfo sysctl hypercall.
Caveat 2: Even though the clock multipliers are being
scaled and recorded correctly in both dom0 and the
hypervisor, time errors appear immediately after a
frequency change. They are not more likely when
the frequency is constant.
Signed-off-by: Mark Langsdorf <mark.langsdorf@xxxxxxx>
diff -r 05c22f282023 arch/i386/kernel/cpu/cpufreq/powernow-k8.c
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c Tue Aug 14 16:20:55
2007 +0100
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c Thu Aug 30 12:21:35
2007 -0500
@@ -738,6 +738,7 @@ static int find_psb_table(struct powerno
data->numps = psb->numps;
dprintk("numpstates: 0x%x\n", data->numps);
+ data->starting_core_affinity = cpumask_of_cpu(0);
return fill_powernow_table(data, (struct pst_s *)(psb+1),
maxvid);
}
/*
@@ -758,15 +759,43 @@ static int find_psb_table(struct powerno
#ifdef CONFIG_X86_POWERNOW_K8_ACPI
static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
unsigned int index)
{
- if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE))
+ if (!data->acpi_data->state_count || (cpu_family == CPU_HW_PSTATE))
return;
- data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) &
IRT_MASK;
- data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) &
RVO_MASK;
- data->exttype = (data->acpi_data.states[index].control >>
EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
- data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT)
& PLL_L_MASK;
- data->vidmvs = 1 << ((data->acpi_data.states[index].control >>
MVS_SHIFT) & MVS_MASK);
- data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) &
VST_MASK;
+ data->irt = (data->acpi_data->states[index].control >> IRT_SHIFT) &
IRT_MASK;
+ data->rvo = (data->acpi_data->states[index].control >> RVO_SHIFT) &
RVO_MASK;
+ data->exttype = (data->acpi_data->states[index].control >>
EXT_TYPE_SHIFT) & EXT_TYPE_MASK;
+ data->plllock = (data->acpi_data->states[index].control >> PLL_L_SHIFT)
& PLL_L_MASK;
+ data->vidmvs = 1 << ((data->acpi_data->states[index].control >>
MVS_SHIFT) & MVS_MASK);
+ data->vstable = (data->acpi_data->states[index].control >> VST_SHIFT) &
VST_MASK;
+}
+
+static struct acpi_processor_performance *acpi_perf_data[NR_CPUS];
+static int preregister_valid = 0;
+
+static int powernow_k8_cpu_preinit_acpi()
+{
+ int i;
+ struct acpi_processor_performance *data;
+ for_each_possible_cpu(i) {
+ data = kzalloc(sizeof(struct acpi_processor_performance),
+ GFP_KERNEL);
+ if (!data) {
+ int j;
+ for_each_possible_cpu(j) {
+ kfree(acpi_perf_data[j]);
+ acpi_perf_data[j] = NULL;
+ }
+ return -ENODEV;
+ }
+ acpi_perf_data[i] = data;
+ }
+
+ if (acpi_processor_preregister_performance(acpi_perf_data))
+ return -ENODEV;
+ else
+ preregister_valid = 1;
+ return 0;
}
static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
@@ -774,28 +803,29 @@ static int powernow_k8_cpu_init_acpi(str
struct cpufreq_frequency_table *powernow_table;
int ret_val;
- if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
+ data->acpi_data = acpi_perf_data[data->cpu];
+ if (acpi_processor_register_performance(data->acpi_data, data->cpu)) {
dprintk("register performance failed: bad ACPI data\n");
return -EIO;
}
/* verify the data contained in the ACPI structures */
- if (data->acpi_data.state_count <= 1) {
+ if (data->acpi_data->state_count <= 1) {
dprintk("No ACPI P-States\n");
goto err_out;
}
- if ((data->acpi_data.control_register.space_id !=
ACPI_ADR_SPACE_FIXED_HARDWARE) ||
- (data->acpi_data.status_register.space_id !=
ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+ if ((data->acpi_data->control_register.space_id !=
ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+ (data->acpi_data->status_register.space_id !=
ACPI_ADR_SPACE_FIXED_HARDWARE)) {
dprintk("Invalid control/status registers (%x - %x)\n",
- data->acpi_data.control_register.space_id,
- data->acpi_data.status_register.space_id);
+ data->acpi_data->control_register.space_id,
+ data->acpi_data->status_register.space_id);
goto err_out;
}
/* fill in data->powernow_table */
powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table)
- * (data->acpi_data.state_count + 1)), GFP_KERNEL);
+ * (data->acpi_data->state_count + 1)), GFP_KERNEL);
if (!powernow_table) {
dprintk("powernow_table memory alloc failure\n");
goto err_out;
@@ -808,28 +838,43 @@ static int powernow_k8_cpu_init_acpi(str
if (ret_val)
goto err_out_mem;
- powernow_table[data->acpi_data.state_count].frequency =
CPUFREQ_TABLE_END;
- powernow_table[data->acpi_data.state_count].index = 0;
+ powernow_table[data->acpi_data->state_count].frequency =
CPUFREQ_TABLE_END;
+ powernow_table[data->acpi_data->state_count].index = 0;
data->powernow_table = powernow_table;
/* fill in data */
- data->numps = data->acpi_data.state_count;
+ data->numps = data->acpi_data->state_count;
print_basics(data);
powernow_k8_acpi_pst_values(data, 0);
/* notify BIOS that we exist */
acpi_processor_notify_smm(THIS_MODULE);
+ /* determine affinity, from ACPI if available */
+ if (preregister_valid) {
+ if ((data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ALL) ||
+ (data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ANY))
+ data->starting_core_affinity =
data->acpi_data->shared_cpu_map;
+ else
+ data->starting_core_affinity =
cpumask_of_cpu(data->cpu);
+ } else {
+ /* best guess from family if not */
+ if (cpu_family == CPU_HW_PSTATE)
+ data->starting_core_affinity =
cpumask_of_cpu(data->cpu);
+ else
+ data->starting_core_affinity = cpu_core_map[data->cpu];
+ }
+
return 0;
err_out_mem:
kfree(powernow_table);
err_out:
- acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
-
- /* data->acpi_data.state_count informs us at ->exit() whether ACPI was
used */
- data->acpi_data.state_count = 0;
+ acpi_processor_unregister_performance(data->acpi_data, data->cpu);
+
+ /* data->acpi_data->state_count informs us at ->exit() whether ACPI was
used */
+ data->acpi_data->state_count = 0;
return -ENODEV;
}
@@ -838,13 +883,13 @@ static int fill_powernow_table_pstate(st
{
int i;
- for (i = 0; i < data->acpi_data.state_count; i++) {
+ for (i = 0; i < data->acpi_data->state_count; i++) {
u32 index;
u32 hi = 0, lo = 0;
u32 fid;
u32 did;
- index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
+ index = data->acpi_data->states[i].control & HW_PSTATE_MASK;
if (index > MAX_HW_PSTATE) {
printk(KERN_ERR PFX "invalid pstate %d - bad value
%d.\n", i, index);
printk(KERN_ERR PFX "Please report to BIOS
manufacturer\n");
@@ -865,10 +910,10 @@ static int fill_powernow_table_pstate(st
powernow_table[i].frequency = find_khz_freq_from_fiddid(fid,
did);
- if (powernow_table[i].frequency !=
(data->acpi_data.states[i].core_frequency * 1000)) {
+ if (powernow_table[i].frequency !=
(data->acpi_data->states[i].core_frequency * 1000)) {
printk(KERN_INFO PFX "invalid freq entries %u kHz vs.
%u kHz\n",
powernow_table[i].frequency,
- (unsigned int)
(data->acpi_data.states[i].core_frequency * 1000));
+ (unsigned int)
(data->acpi_data->states[i].core_frequency * 1000));
powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
continue;
}
@@ -880,16 +925,16 @@ static int fill_powernow_table_fidvid(st
{
int i;
int cntlofreq = 0;
- for (i = 0; i < data->acpi_data.state_count; i++) {
+ for (i = 0; i < data->acpi_data->state_count; i++) {
u32 fid;
u32 vid;
if (data->exttype) {
- fid = data->acpi_data.states[i].status & EXT_FID_MASK;
- vid = (data->acpi_data.states[i].status >> VID_SHIFT) &
EXT_VID_MASK;
+ fid = data->acpi_data->states[i].status & EXT_FID_MASK;
+ vid = (data->acpi_data->states[i].status >> VID_SHIFT)
& EXT_VID_MASK;
} else {
- fid = data->acpi_data.states[i].control & FID_MASK;
- vid = (data->acpi_data.states[i].control >> VID_SHIFT)
& VID_MASK;
+ fid = data->acpi_data->states[i].control & FID_MASK;
+ vid = (data->acpi_data->states[i].control >> VID_SHIFT)
& VID_MASK;
}
dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid);
@@ -930,10 +975,10 @@ static int fill_powernow_table_fidvid(st
cntlofreq = i;
}
- if (powernow_table[i].frequency !=
(data->acpi_data.states[i].core_frequency * 1000)) {
+ if (powernow_table[i].frequency !=
(data->acpi_data->states[i].core_frequency * 1000)) {
printk(KERN_INFO PFX "invalid freq entries %u kHz vs.
%u kHz\n",
powernow_table[i].frequency,
- (unsigned int)
(data->acpi_data.states[i].core_frequency * 1000));
+ (unsigned int)
(data->acpi_data->states[i].core_frequency * 1000));
powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID;
continue;
}
@@ -943,14 +988,15 @@ static int fill_powernow_table_fidvid(st
static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
{
- if (data->acpi_data.state_count)
- acpi_processor_unregister_performance(&data->acpi_data,
data->cpu);
+ if (data->acpi_data->state_count)
+ acpi_processor_unregister_performance(data->acpi_data,
data->cpu);
}
#else
static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return
-ENODEV; }
static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return;
}
static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data,
unsigned int index) { return; }
+static int powernow_k8_cpu_preinit_acpi() { return -ENODEV; }
#endif /* CONFIG_X86_POWERNOW_K8_ACPI */
/* Take a frequency, and issue the fid/vid transition command */
@@ -1164,7 +1210,7 @@ static int __cpuinit powernowk8_cpu_init
* an UP version, and is deprecated by AMD.
*/
if (num_online_cpus() != 1) {
- printk(KERN_ERR PFX "MP systems not supported by PSB
BIOS structure\n");
+ printk(KERN_ERR PFX "Your BIOS does not provide _PSS
objects. PowerNow! does not work on SMP systems without _PSS objects.
Complain to your BIOS vendor.\n");
kfree(data);
return -ENODEV;
}
@@ -1204,10 +1250,7 @@ static int __cpuinit powernowk8_cpu_init
set_cpus_allowed(current, oldmask);
pol->governor = CPUFREQ_DEFAULT_GOVERNOR;
- if (cpu_family == CPU_HW_PSTATE)
- pol->cpus = cpumask_of_cpu(pol->cpu);
- else
- pol->cpus = cpu_core_map[pol->cpu];
+ pol->cpus = data->starting_core_affinity;
data->available_cores = &(pol->cpus);
/* Take a crude guess here.
@@ -1323,6 +1366,7 @@ static int __cpuinit powernowk8_init(voi
}
if (supported_cpus == num_online_cpus()) {
+ powernow_k8_cpu_preinit_acpi();
printk(KERN_INFO PFX "Found %d %s "
"processors (" VERSION ")\n", supported_cpus,
boot_cpu_data.x86_model_id);
diff -r 05c22f282023 arch/i386/kernel/cpu/cpufreq/powernow-k8.h
--- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h Tue Aug 14 16:20:55
2007 +0100
+++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h Thu Aug 30 12:21:35
2007 -0500
@@ -32,12 +32,13 @@ struct powernow_k8_data {
#ifdef CONFIG_X86_POWERNOW_K8_ACPI
/* the acpi table needs to be kept. it's only available if ACPI was
* used to determine valid frequency/vid/fid states */
- struct acpi_processor_performance acpi_data;
+ struct acpi_processor_performance *acpi_data;
#endif
/* we need to keep track of associated cores, but let cpufreq
* handle hotplug events - so just point at cpufreq pol->cpus
* structure */
cpumask_t *available_cores;
+ cpumask_t starting_core_affinity;
};
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|