WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] x86/cpufreq: don't use static array for large per-CP

To: <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH] x86/cpufreq: don't use static array for large per-CPU data structures
From: "Jan Beulich" <jbeulich@xxxxxxxxxx>
Date: Fri, 19 Sep 2008 14:09:50 +0100
Delivery-date: Fri, 19 Sep 2008 06:09:36 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
... as this is rather wasteful when Xen is configured to support many
CPUs but is running on systems having only a few.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

Index: 2008-09-19/xen/arch/x86/acpi/cpufreq/cpufreq.c
===================================================================
--- 2008-09-19.orig/xen/arch/x86/acpi/cpufreq/cpufreq.c 2008-09-19 
14:28:30.000000000 +0200
+++ 2008-09-19/xen/arch/x86/acpi/cpufreq/cpufreq.c      2008-09-19 
14:32:34.000000000 +0200
@@ -389,12 +389,15 @@ static int acpi_cpufreq_target(struct cp
 
 static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
 {
-    struct acpi_cpufreq_data *data = drv_data[policy->cpu];
-    struct processor_performance *perf = &processor_pminfo[policy->cpu].perf;
+    struct acpi_cpufreq_data *data;
+    struct processor_performance *perf;
 
-    if (!policy || !data)
+    if (!policy || !(data = drv_data[policy->cpu]) ||
+        !processor_pminfo[policy->cpu])
         return -EINVAL;
 
+    perf = &processor_pminfo[policy->cpu]->perf;
+
     cpufreq_verify_within_limits(policy, 0, 
         perf->states[perf->platform_limit].core_frequency * 1000);
 
@@ -447,7 +450,7 @@ acpi_cpufreq_cpu_init(struct cpufreq_pol
 
     drv_data[cpu] = data;
 
-    data->acpi_data = &processor_pminfo[cpu].perf;
+    data->acpi_data = &processor_pminfo[cpu]->perf;
 
     perf = data->acpi_data;
     policy->shared_type = perf->shared_type;
@@ -580,11 +583,11 @@ static struct cpufreq_driver acpi_cpufre
 
 int cpufreq_limit_change(unsigned int cpu)
 {
-    struct processor_performance *perf = &processor_pminfo[cpu].perf;
+    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
     struct cpufreq_policy *data = cpufreq_cpu_policy[cpu];
     struct cpufreq_policy policy;
 
-    if (!cpu_online(cpu) || !data)
+    if (!cpu_online(cpu) || !data || !processor_pminfo[cpu])
         return -ENODEV;
 
     if ((perf->platform_limit < 0) || 
@@ -607,10 +610,10 @@ int cpufreq_add_cpu(unsigned int cpu)
     unsigned int j;
     struct cpufreq_policy new_policy;
     struct cpufreq_policy *policy;
-    struct processor_performance *perf = &processor_pminfo[cpu].perf;
+    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
 
     /* to protect the case when Px was not controlled by xen */
-    if (!(perf->init & XEN_PX_INIT))
+    if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
         return 0;
 
     if (cpu_is_offline(cpu) || cpufreq_cpu_policy[cpu])
@@ -683,10 +686,10 @@ int cpufreq_del_cpu(unsigned int cpu)
 {
     unsigned int dom;
     struct cpufreq_policy *policy;
-    struct processor_performance *perf = &processor_pminfo[cpu].perf;
+    struct processor_performance *perf = &processor_pminfo[cpu]->perf;
 
     /* to protect the case when Px was not controlled by xen */
-    if (!(perf->init & XEN_PX_INIT))
+    if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
         return 0;
 
     if (cpu_is_offline(cpu) || !cpufreq_cpu_policy[cpu])
Index: 2008-09-19/xen/arch/x86/acpi/cpufreq/powernow.c
===================================================================
--- 2008-09-19.orig/xen/arch/x86/acpi/cpufreq/powernow.c        2008-09-19 
14:28:30.000000000 +0200
+++ 2008-09-19/xen/arch/x86/acpi/cpufreq/powernow.c     2008-09-19 
13:44:18.000000000 +0200
@@ -49,9 +49,6 @@
 #define MSR_PSTATE_CTRL         0xc0010062 /* Pstate control MSR */
 #define MSR_PSTATE_CUR_LIMIT    0xc0010061 /* pstate current limit MSR */
 
-extern struct processor_pminfo processor_pminfo[NR_CPUS];
-extern struct cpufreq_policy *cpufreq_cpu_policy[NR_CPUS];
-
 struct powernow_cpufreq_data {
     struct processor_performance *acpi_data;
     struct cpufreq_frequency_table *freq_table;
@@ -149,7 +146,7 @@ static int powernow_cpufreq_cpu_init(str
 
     drv_data[cpu] = data;
 
-    data->acpi_data = &processor_pminfo[cpu].perf;
+    data->acpi_data = &processor_pminfo[cpu]->perf;
 
     perf = data->acpi_data;
     policy->shared_type = perf->shared_type;
@@ -257,8 +254,8 @@ int powernow_cpufreq_init(void)
        }
         if (ret)
             return ret;
-        if (max_dom < processor_pminfo[i].perf.domain_info.domain)
-            max_dom = processor_pminfo[i].perf.domain_info.domain;
+        if (max_dom < processor_pminfo[i]->perf.domain_info.domain)
+            max_dom = processor_pminfo[i]->perf.domain_info.domain;
     }
     max_dom++;
 
@@ -274,13 +271,13 @@ int powernow_cpufreq_init(void)
 
     /* get cpumask of each psd domain */
     for_each_online_cpu(i) {
-        __set_bit(processor_pminfo[i].perf.domain_info.domain, dom_mask);
-        cpu_set(i, pt[processor_pminfo[i].perf.domain_info.domain]);
+        __set_bit(processor_pminfo[i]->perf.domain_info.domain, dom_mask);
+        cpu_set(i, pt[processor_pminfo[i]->perf.domain_info.domain]);
     }
 
     for_each_online_cpu(i)
-        processor_pminfo[i].perf.shared_cpu_map = 
-            pt[processor_pminfo[i].perf.domain_info.domain];
+        processor_pminfo[i]->perf.shared_cpu_map =
+            pt[processor_pminfo[i]->perf.domain_info.domain];
 
     cpufreq_driver = &powernow_cpufreq_driver;
 
Index: 2008-09-19/xen/arch/x86/acpi/cpufreq/utility.c
===================================================================
--- 2008-09-19.orig/xen/arch/x86/acpi/cpufreq/utility.c 2008-09-19 
14:28:30.000000000 +0200
+++ 2008-09-19/xen/arch/x86/acpi/cpufreq/utility.c      2008-09-19 
13:54:43.000000000 +0200
@@ -32,8 +32,8 @@
 #include <public/sysctl.h>
 
 struct cpufreq_driver   *cpufreq_driver;
-struct processor_pminfo processor_pminfo[NR_CPUS];
-struct cpufreq_policy   *cpufreq_cpu_policy[NR_CPUS];
+struct processor_pminfo *__read_mostly processor_pminfo[NR_CPUS];
+struct cpufreq_policy   *__read_mostly cpufreq_cpu_policy[NR_CPUS];
 
 /*********************************************************************
  *                    Px STATISTIC INFO                              *
@@ -47,11 +47,14 @@ void px_statistic_update(cpumask_t cpuma
     now = NOW();
 
     for_each_cpu_mask(i, cpumask) {
-        struct pm_px *pxpt = &px_statistic_data[i];
-        uint32_t statnum = processor_pminfo[i].perf.state_count;
+        struct pm_px *pxpt = px_statistic_data[i];
+        struct processor_pminfo *pmpt = processor_pminfo[i];
         uint64_t total_idle_ns;
         uint64_t tmp_idle_ns;
 
+        if ( !pxpt || !pmpt )
+            continue;
+
         total_idle_ns = get_cpu_idle_time(i);
         tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
 
@@ -61,7 +64,7 @@ void px_statistic_update(cpumask_t cpuma
         pxpt->u.pt[from].residency += now - pxpt->prev_state_wall;
         pxpt->u.pt[from].residency -= tmp_idle_ns;
 
-        (*(pxpt->u.trans_pt + from*statnum + to))++;
+        (*(pxpt->u.trans_pt + from * pmpt->perf.state_count + to))++;
 
         pxpt->prev_state_wall = now;
         pxpt->prev_idle_wall = total_idle_ns;
@@ -71,11 +74,23 @@ void px_statistic_update(cpumask_t cpuma
 int px_statistic_init(unsigned int cpuid)
 {
     uint32_t i, count;
-    struct pm_px *pxpt = &px_statistic_data[cpuid];
-    struct processor_pminfo *pmpt = &processor_pminfo[cpuid];
+    struct pm_px *pxpt = px_statistic_data[cpuid];
+    const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
 
     count = pmpt->perf.state_count;
 
+    if ( !pmpt )
+        return -EINVAL;
+
+    if ( !pxpt )
+    {
+        pxpt = xmalloc(struct pm_px);
+        if ( !pxpt )
+            return -ENOMEM;
+        memset(pxpt, 0, sizeof(*pxpt));
+        px_statistic_data[cpuid] = pxpt;
+    }
+
     pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count);
     if (!pxpt->u.trans_pt)
         return -ENOMEM;
@@ -103,8 +118,10 @@ int px_statistic_init(unsigned int cpuid
 
 void px_statistic_exit(unsigned int cpuid)
 {
-    struct pm_px *pxpt = &px_statistic_data[cpuid];
+    struct pm_px *pxpt = px_statistic_data[cpuid];
 
+    if (!pxpt)
+        return;
     xfree(pxpt->u.trans_pt);
     xfree(pxpt->u.pt);
     memset(pxpt, 0, sizeof(struct pm_px));
@@ -113,9 +130,13 @@ void px_statistic_exit(unsigned int cpui
 void px_statistic_reset(unsigned int cpuid)
 {
     uint32_t i, j, count;
-    struct pm_px *pxpt = &px_statistic_data[cpuid];
+    struct pm_px *pxpt = px_statistic_data[cpuid];
+    const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
 
-    count = processor_pminfo[cpuid].perf.state_count;
+    if ( !pxpt || !pmpt )
+        return;
+
+    count = pmpt->perf.state_count;
 
     for (i=0; i < count; i++) {
         pxpt->u.pt[i].residency = 0;
Index: 2008-09-19/xen/arch/x86/acpi/cpu_idle.c
===================================================================
--- 2008-09-19.orig/xen/arch/x86/acpi/cpu_idle.c        2008-09-19 
14:28:30.000000000 +0200
+++ 2008-09-19/xen/arch/x86/acpi/cpu_idle.c     2008-09-19 13:51:21.000000000 
+0200
@@ -64,7 +64,7 @@ static void (*pm_idle_save) (void) __rea
 unsigned int max_cstate __read_mostly = 2;
 integer_param("max_cstate", max_cstate);
 
-static struct acpi_processor_power processor_powers[NR_CPUS];
+static struct acpi_processor_power *__read_mostly processor_powers[NR_CPUS];
 
 static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
 {
@@ -93,8 +93,11 @@ static void print_acpi_power(uint32_t cp
 
 static void dump_cx(unsigned char key)
 {
-    for( int i = 0; i < num_online_cpus(); i++ )
-        print_acpi_power(i, &processor_powers[i]);
+    unsigned int cpu;
+
+    for_each_online_cpu ( cpu )
+        if (processor_powers[cpu])
+            print_acpi_power(cpu, processor_powers[cpu]);
 }
 
 static int __init cpu_idle_key_init(void)
@@ -195,14 +198,12 @@ static struct {
 
 static void acpi_processor_idle(void)
 {
-    struct acpi_processor_power *power = NULL;
+    struct acpi_processor_power *power = processor_powers[smp_processor_id()];
     struct acpi_processor_cx *cx = NULL;
     int next_state;
     int sleep_ticks = 0;
     u32 t1, t2 = 0;
 
-    power = &processor_powers[smp_processor_id()];
-
     /*
      * Interrupts must be disabled during bus mastering calculations and
      * for C2/C3 transitions.
@@ -215,7 +216,7 @@ static void acpi_processor_idle(void)
         return;
     }
 
-    next_state = cpuidle_current_governor->select(power);
+    next_state = power ? cpuidle_current_governor->select(power) : -1;
     if ( next_state > 0 )
     {
         cx = &power->states[next_state];
@@ -670,7 +671,15 @@ long set_cx_pminfo(uint32_t cpu, struct 
         return -EFAULT;
     }
 
-    acpi_power = &processor_powers[cpu_id];
+    acpi_power = processor_powers[cpu_id];
+    if ( !acpi_power )
+    {
+        acpi_power = xmalloc(struct acpi_processor_power);
+        if ( !acpi_power )
+            return -ENOMEM;
+        memset(acpi_power, 0, sizeof(*acpi_power));
+        processor_powers[cpu_id] = acpi_power;
+    }
 
     init_cx_pminfo(acpi_power);
 
@@ -708,18 +717,25 @@ long set_cx_pminfo(uint32_t cpu, struct 
 
 uint32_t pmstat_get_cx_nr(uint32_t cpuid)
 {
-    return processor_powers[cpuid].count;
+    return processor_powers[cpuid] ? processor_powers[cpuid]->count : 0;
 }
 
 int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
 {
-    struct acpi_processor_power *power = &processor_powers[cpuid];
+    const struct acpi_processor_power *power = processor_powers[cpuid];
     struct vcpu *v = idle_vcpu[cpuid];
     uint64_t usage;
     int i;
 
+    if ( unlikely(!power) )
+    {
+        stat->last = 0;
+        stat->nr = 0;
+        stat->idle_time = 0;
+        return 0;
+    }
     stat->last = (power->last_state) ? power->last_state->type : 0;
-    stat->nr = processor_powers[cpuid].count;
+    stat->nr = power->count;
     stat->idle_time = v->runstate.time[RUNSTATE_running];
     if ( v->is_running )
         stat->idle_time += NOW() - v->runstate.state_entry_time;
Index: 2008-09-19/xen/arch/x86/acpi/pmstat.c
===================================================================
--- 2008-09-19.orig/xen/arch/x86/acpi/pmstat.c  2008-09-19 14:28:30.000000000 
+0200
+++ 2008-09-19/xen/arch/x86/acpi/pmstat.c       2008-09-19 13:36:55.000000000 
+0200
@@ -40,7 +40,7 @@
 #include <public/sysctl.h>
 #include <acpi/cpufreq/cpufreq.h>
 
-struct pm_px px_statistic_data[NR_CPUS];
+struct pm_px *__read_mostly px_statistic_data[NR_CPUS];
 
 extern uint32_t pmstat_get_cx_nr(uint32_t cpuid);
 extern int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat);
@@ -49,15 +49,14 @@ extern int pmstat_reset_cx_stat(uint32_t
 int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
 {
     int ret = 0;
-    struct pm_px *pxpt = &px_statistic_data[op->cpuid];
-    struct processor_pminfo *pmpt = &processor_pminfo[op->cpuid];
+    const struct processor_pminfo *pmpt = processor_pminfo[op->cpuid];
 
     /* to protect the case when Px was not controlled by xen */
-    if ( (!(pmpt->perf.init & XEN_PX_INIT)) && 
+    if ( (!pmpt || !(pmpt->perf.init & XEN_PX_INIT)) &&
         (op->type & PMSTAT_CATEGORY_MASK) == PMSTAT_PX )
         return -EINVAL;
 
-    if ( !cpu_online(op->cpuid) )
+    if ( op->cpuid >= NR_CPUS || !cpu_online(op->cpuid) )
         return -EINVAL;
 
     switch( op->type )
@@ -73,6 +72,10 @@ int do_get_pm_info(struct xen_sysctl_get
         uint64_t now, ct;
         uint64_t total_idle_ns;
         uint64_t tmp_idle_ns;
+        struct pm_px *pxpt = px_statistic_data[op->cpuid];
+
+        if ( !pxpt )
+            return -ENODATA;
 
         total_idle_ns = get_cpu_idle_time(op->cpuid);
         tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
Index: 2008-09-19/xen/arch/x86/platform_hypercall.c
===================================================================
--- 2008-09-19.orig/xen/arch/x86/platform_hypercall.c   2008-09-19 
14:28:30.000000000 +0200
+++ 2008-09-19/xen/arch/x86/platform_hypercall.c        2008-09-19 
13:36:55.000000000 +0200
@@ -380,8 +380,19 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
                 ret = -EINVAL;
                 break;
             }
-            pmpt = &processor_pminfo[cpuid];
-            pxpt = &processor_pminfo[cpuid].perf;
+            pmpt = processor_pminfo[cpuid];
+            if ( !pmpt )
+            {
+                pmpt = xmalloc(struct processor_pminfo);
+                if ( !pmpt )
+                {
+                    ret = -ENOMEM;
+                    break;
+                }
+                memset(pmpt, 0, sizeof(*pmpt));
+                processor_pminfo[cpuid] = pmpt;
+            }
+            pxpt = &pmpt->perf;
             pmpt->acpi_id = xenpmpt->id;
             pmpt->id = cpuid;
 
Index: 2008-09-19/xen/include/acpi/cpufreq/processor_perf.h
===================================================================
--- 2008-09-19.orig/xen/include/acpi/cpufreq/processor_perf.h   2008-09-19 
14:28:30.000000000 +0200
+++ 2008-09-19/xen/include/acpi/cpufreq/processor_perf.h        2008-09-19 
13:36:55.000000000 +0200
@@ -41,7 +41,7 @@ struct processor_pminfo {
     struct processor_performance    perf;
 };
 
-extern struct processor_pminfo processor_pminfo[NR_CPUS];
+extern struct processor_pminfo *processor_pminfo[NR_CPUS];
 
 struct px_stat {
     uint8_t total;        /* total Px states */
@@ -58,6 +58,6 @@ struct pm_px {
     uint64_t prev_idle_wall;
 };
 
-extern struct pm_px px_statistic_data[NR_CPUS];
+extern struct pm_px *px_statistic_data[NR_CPUS];
 
 #endif /* __XEN_PROCESSOR_PM_H__ */



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH] x86/cpufreq: don't use static array for large per-CPU data structures, Jan Beulich <=