WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH][pvops_dom0] Fix dom0 panic when physical CPU number

To: Jeremy Fitzhardinge <jeremy@xxxxxxxx>
Subject: [Xen-devel] [PATCH][pvops_dom0] Fix dom0 panic when physical CPU number is larger than NR_CPUS
From: "Yu, Ke" <ke.yu@xxxxxxxxx>
Date: Thu, 14 Jan 2010 14:33:54 +0800
Accept-language: en-US
Acceptlanguage: en-US
Cc: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Delivery-date: Wed, 13 Jan 2010 22:35:12 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: AcqU44rCZTqnoPy4SfusG0yEoJeVmw==
Thread-topic: [PATCH][pvops_dom0] Fix dom0 panic when physical CPU number is larger than NR_CPUS
    Fix dom0 panic when physical CPU number is larger than NR_CPUS
    
    When physical CPU number is larger than NR_CPUS, there is panic in xen acpi 
processor handling routine:
    [    1.103571] BUG: unable to handle kernel NULL pointer dereference at 
0000000000000003
    [    1.104545] IP: [<ffffffff8126c72f>] xen_cx_notifier+0xbf/0x1a6
    ...
    [    1.104545] Call Trace:
    [    1.104545]  [<ffffffff8126c84f>] processor_cntl_xen_notify+0x39/0x92
    [    1.104545]  [<ffffffff81563957>] 
xen_acpi_processor_power_init+0x7f/0x127
    [    1.104545]  [<ffffffff815635b2>] xen_acpi_processor_start+0x2c6/0x3ed
    [    1.104545]  [<ffffffff8102e13f>] ? xen_restore_fl_direct_end+0x0/0x1
    [    1.104545]  [<ffffffff810f2b07>] ? raw_local_irq_restore+0x19/0x1b
    [    1.104545]  [<ffffffff81226f46>] acpi_start_single_object+0x2a/0x54
    [    1.104545]  [<ffffffff812271a0>] acpi_device_probe+0x8f/0x154
    [    1.104545]  [<ffffffff812cc7c5>] driver_probe_device+0xb2/0x136
    [    1.104545]  [<ffffffff812cc89d>] __driver_attach+0x54/0x77
    [    1.104545]  [<ffffffff812cc849>] ? __driver_attach+0x0/0x77
    [    1.104545]  [<ffffffff812cc849>] ? __driver_attach+0x0/0x77
    [    1.104545]  [<ffffffff812cbe54>] bus_for_each_dev+0x49/0x78
    [    1.104545]  [<ffffffff812cc625>] driver_attach+0x1c/0x1e
    [    1.104545]  [<ffffffff812cb963>] bus_add_driver+0xba/0x21f
    [    1.104545]  [<ffffffff812ccc02>] driver_register+0x9e/0x115
    [    1.104545]  [<ffffffff81228997>] acpi_bus_register_driver+0x3e/0x43
    [    1.104545]  [<ffffffff8194d413>] acpi_processor_init+0xbc/0x12f
    [    1.104545]  [<ffffffff81228997>] ? acpi_bus_register_driver+0x3e/0x43
    [    1.104545]  [<ffffffff8194d357>] ? acpi_processor_init+0x0/0x12f
    
    The root cause is that one Xen acpi processor array use NR_CPUS as max 
index, and other code reference this array using physical acpi id as index. In 
certain large system, acpi id may easily overflow the default NR_CPUS 8.
    
    From logical point of view, NR_CPUS is for vCPU, while acpi id is for 
physical CPU, it is not reasonable to mix them. so this patch replace NR_CPU 
with XEN_MAX_ACPI_ID to unify the usage. with this patch, this kernel panic 
disappears.
    
    Signed-off-by: Yu Ke <ke.yu@xxxxxxxxx>

diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index e821709..91df512 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -1330,9 +1330,7 @@ static int xen_acpi_processor_get_info(struct acpi_device 
*device)
        return 0;
 }
 
-#define MAX_ACPI_ID 255
-
-static struct acpi_device *processor_device_array[MAX_ACPI_ID + 1];
+static struct acpi_device *processor_device_array[XEN_MAX_ACPI_ID + 1];
 
 static int __cpuinit xen_acpi_processor_start(struct acpi_device *device)
 {
@@ -1347,14 +1345,13 @@ static int __cpuinit xen_acpi_processor_start(struct 
acpi_device *device)
                return 0;
        }
 
-       if (pr->acpi_id > MAX_ACPI_ID)
-               return 0;
        /*
         * Buggy BIOS check
         * ACPI id of processors can be reported wrongly by the BIOS.
         * Don't trust it blindly
         */
-       if (processor_device_array[pr->acpi_id] != NULL &&
+       if (pr->acpi_id > XEN_MAX_ACPI_ID ||
+                       processor_device_array[pr->acpi_id] != NULL &&
                processor_device_array[pr->acpi_id] != device) {
                printk(KERN_WARNING "BIOS reported wrong ACPI id "
                                "for the processor\n");
diff --git a/drivers/xen/acpi_processor.c b/drivers/xen/acpi_processor.c
index f2f59cd..77be04b 100644
--- a/drivers/xen/acpi_processor.c
+++ b/drivers/xen/acpi_processor.c
@@ -43,14 +43,14 @@ static struct processor_cntl_xen_ops xen_ops = {
        .hotplug                = xen_hotplug_notifier,
 };
 
-static struct acpi_power_register *power_registers[NR_CPUS];
+static struct acpi_power_register *power_registers[XEN_MAX_ACPI_ID + 1];
 
 int processor_cntl_xen_power_cache(int cpu, int cx,
                struct acpi_power_register *reg)
 {
        struct acpi_power_register *buf;
 
-       if (cpu < 0 || cpu >= NR_CPUS ||
+       if (cpu < 0 || cpu > XEN_MAX_ACPI_ID ||
                        cx < 1 || cx > ACPI_PROCESSOR_MAX_POWER) {
                return -EINVAL;
        }
@@ -201,6 +201,12 @@ static int xen_cx_notifier(struct acpi_processor *pr, int 
action)
        if (action == PROCESSOR_PM_CHANGE)
                return -EINVAL;
 
+       if (power_registers[pr->acpi_id] == NULL) {
+               printk(KERN_WARNING "No C state info for acpi processor %d\n",
+                               pr->acpi_id);
+               return -EINVAL;
+       }
+
        /* Convert to Xen defined structure and hypercall */
        buf = kzalloc(pr->power.count * sizeof(struct xen_processor_cx),
                        GFP_KERNEL);
diff --git a/include/xen/acpi.h b/include/xen/acpi.h
index 24f0bfb..a738725 100644
--- a/include/xen/acpi.h
+++ b/include/xen/acpi.h
@@ -37,6 +37,8 @@ int acpi_notify_hypervisor_state(u8 sleep_state,
 #define PM_TYPE_THR            2
 #define PM_TYPE_MAX            3
 
+#define XEN_MAX_ACPI_ID 255
+
 /* Processor hotplug events */
 #define HOTPLUG_TYPE_ADD       0
 #define HOTPLUG_TYPE_REMOVE    1

Attachment: acpi_id_overflow.patch
Description: acpi_id_overflow.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH][pvops_dom0] Fix dom0 panic when physical CPU number is larger than NR_CPUS, Yu, Ke <=