WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 2/2] Add physical CPU hotplug support to Xen hypervis

To: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>, Jeremy Fitzhardinge <jeremy@xxxxxxxx>, Keir Fraser <keir.fraser@xxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH 2/2] Add physical CPU hotplug support to Xen hypervisor
From: "Jiang, Yunhong" <yunhong.jiang@xxxxxxxxx>
Date: Thu, 24 Sep 2009 23:31:50 +0800
Accept-language: en-US
Acceptlanguage: en-US
Cc:
Delivery-date: Thu, 24 Sep 2009 08:33:50 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: Aco9LCLXqkEv37MGSVGuL8zUv16R4g==
Thread-topic: [PATCH 2/2] Add physical CPU hotplug support to Xen hypervisor
This patch add  physical CPU hotplug support to xen hypervisor.

After hypercall from dom0, Xen will allocate logical cpu id for the new CPU and 
mark it present. Later, dom0 can online the new CPUs.

The patch include following related changes to achive this purpose:
a) Add the hypercall for CPU add/remove.
b) Same as upstream kernel, use generic_processor_info() for both MP table and 
ACPI table, originally the ACPI table will call MP_processor_info().
c) Currently when alloc idle vcpu, it is assumed it is continous, however, in 
cpu hotplug situation, user may online CPU X+1 and then CPU X, so we changes 
the method how a domain's vcpu is connected through next_in_list.
d) Change some __init to __devinit.
e) We add the logical CPU O*L to the platform call also, since dom0 kernel 
current sysctl hypercall.

 arch/x86/acpi/boot.c                     |    1
 arch/x86/mpparse.c                       |  100 ++++-----------
 arch/x86/numa.c                          |   10 -
 arch/x86/platform_hypercall.c            |  107 ++++++++++++++++
 arch/x86/setup.c                         |    3
 arch/x86/smpboot.c                       |  200 +++++++++++++++++++++++++++++--
 arch/x86/srat.c                          |   23 +--
 arch/x86/x86_64/platform_hypercall.c     |    5
 common/domain.c                          |   10 +
 common/event_channel.c                   |    2
 include/asm-x86/acpi.h                   |    1
 include/asm-x86/mach-generic/mach_apic.h |   11 -
 include/asm-x86/mpspec.h                 |    4
 include/asm-x86/numa.h                   |    6
 include/asm-x86/smp.h                    |    6
 include/public/platform.h                |   56 ++++++++
 include/public/xen.h                     |    1

Signed-off-by: Jiang, Yunhong <yunhong.jiang@xxxxxxxxx>


diff -r 8b1567102cf3 xen/arch/x86/acpi/boot.c
--- a/xen/arch/x86/acpi/boot.c  Tue Sep 22 14:19:38 2009 +0100
+++ b/xen/arch/x86/acpi/boot.c  Wed Sep 23 02:50:50 2009 +0800
@@ -78,7 +78,6 @@ u8 acpi_enable_value, acpi_disable_value
 #warning ACPI uses CMPXCHG, i486 and later hardware
 #endif

-#define MAX_MADT_ENTRIES       256
 u8 x86_acpiid_to_apicid[MAX_MADT_ENTRIES] =
     {[0 ... MAX_MADT_ENTRIES - 1] = 0xff };
 EXPORT_SYMBOL(x86_acpiid_to_apicid);
diff -r 8b1567102cf3 xen/arch/x86/mpparse.c
--- a/xen/arch/x86/mpparse.c    Tue Sep 22 14:19:38 2009 +0100
+++ b/xen/arch/x86/mpparse.c    Thu Sep 24 00:08:55 2009 +0800
@@ -35,7 +35,7 @@

 /* Have we found an MP table */
 int smp_found_config;
-unsigned int __initdata maxcpus = NR_CPUS;
+unsigned int __devinitdata maxcpus = NR_CPUS;

 /*
  * Various Linux-internal data structures created from the
@@ -68,12 +68,10 @@ unsigned int def_to_bigsmp = 0;
 /* Processor that is doing the boot up */
 unsigned int boot_cpu_physical_apicid = -1U;
 /* Internal processor count */
-static unsigned int __devinitdata num_processors;
+unsigned int __devinitdata num_processors;

 /* Bitmask of physically existing CPUs */
 physid_mask_t phys_cpu_present_map;
-
-u32 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };

 /*
  * Intel MP BIOS table parsing routines:
@@ -106,7 +104,6 @@ static void __devinit MP_processor_info
 static void __devinit MP_processor_info (struct mpc_config_processor *m)
 {
        int ver, apicid;
-       physid_mask_t phys_cpu;

        if (!(m->mpc_cpuflag & CPU_ENABLED))
                return;
@@ -167,44 +164,7 @@ static void __devinit MP_processor_info
        }

        ver = m->mpc_apicver;
-
-       /*
-        * Validate version
-        */
-       if (ver == 0x0) {
-               printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
-                               "fixing up to 0x10. (tell your hw vendor)\n",
-                               m->mpc_apicid);
-               ver = 0x10;
-       }
-       apic_version[m->mpc_apicid] = ver;
-
-       phys_cpu = apicid_to_cpu_present(apicid);
-       physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
-
-       if (num_processors >= NR_CPUS) {
-               printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
-                       "  Processor ignored.\n", NR_CPUS);
-               return;
-       }
-
-       if (num_processors >= maxcpus) {
-               printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
-                       " Processor ignored.\n", maxcpus);
-               return;
-       }
-
-       cpu_set(num_processors, cpu_possible_map);
-       num_processors++;
-
-       if (num_processors > 8) {
-               /*
-                * No need for processor or APIC checks: physical delivery
-                * (bigsmp) mode should always work.
-                */
-               def_to_bigsmp = 1;
-       }
-       bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
+    generic_processor_info(apicid, ver);
 }

 static void __init MP_bus_info (struct mpc_config_bus *m)
@@ -714,9 +674,6 @@ void __init get_smp_config (void)
                BUG();

        printk(KERN_INFO "Processors: %d\n", num_processors);
-       /*
-        * Only use the first configuration found.
-        */
 }

 static int __init smp_scan_config (unsigned long base, unsigned long length)
@@ -827,35 +784,34 @@ void __init mp_register_lapic_address (
        Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
 }

-
-void __devinit mp_register_lapic (
-       u8                      id,
+int __devinit mp_register_lapic (
+       uint32_t        apic_id,
        u8                      enabled)
 {
-       struct mpc_config_processor processor;
-       int                     boot_cpu = 0;
-
-       if (MAX_APICS - id <= 0) {
+    if (!enabled)
+        return -1;
+
+       if (apic_id >= MAX_APICS) {
                printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
-                       id, MAX_APICS);
-               return;
-       }
-
-       if (id == boot_cpu_physical_apicid)
-               boot_cpu = 1;
-
-       processor.mpc_type = MP_PROCESSOR;
-       processor.mpc_apicid = id;
-       processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
-       processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
-       processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
-       processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
-               (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
-       processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
-       processor.mpc_reserved[0] = 0;
-       processor.mpc_reserved[1] = 0;
-
-       MP_processor_info(&processor);
+                       apic_id, MAX_APICS);
+               return -1;
+       }
+
+    return generic_processor_info(apic_id,
+            GET_APIC_VERSION(apic_read(APIC_LVR)));
+}
+
+void mp_unregister_lapic(uint32_t apic_id, uint32_t cpu)
+{
+       physid_clear(apic_id, phys_cpu_present_map);
+
+    if (x86_cpu_to_apicid[cpu] != apic_id)
+        return;
+
+    x86_cpu_to_apicid[cpu] = -1U;
+       cpu_clear(cpu, cpu_possible_map);
+       cpu_clear(cpu, cpu_present_map);
+    physid_clear(apic_id, phys_cpu_present_map);
 }

 #ifdef CONFIG_X86_IO_APIC
diff -r 8b1567102cf3 xen/arch/x86/numa.c
--- a/xen/arch/x86/numa.c       Tue Sep 22 14:19:38 2009 +0100
+++ b/xen/arch/x86/numa.c       Wed Sep 23 02:50:50 2009 +0800
@@ -42,9 +42,9 @@ nodemask_t node_online_map = { { [0] = 1
 nodemask_t node_online_map = { { [0] = 1UL } };

 /* Default NUMA to off for now. acpi=on required to enable it. */
-int numa_off __initdata = 1;
-
-int acpi_numa __initdata;
+int numa_off __devinitdata = 1;
+
+int acpi_numa __devinitdata;

 /*
  * Given a shift value, try to populate memnodemap[]
@@ -53,7 +53,7 @@ int acpi_numa __initdata;
  * 0 if memnodmap[] too small (of shift too small)
  * -1 if node overlap or lost ram (shift too big)
  */
-static int __init
+static int __devinit
 populate_memnodemap(const struct node *nodes, int numnodes, int shift)
 {
        int i;
@@ -259,7 +259,7 @@ static __init int numa_setup(char *opt)
  * prior to this call, and this initialization is good enough
  * for the fake NUMA cases.
  */
-void __init init_cpu_to_node(void)
+void __devinit init_cpu_to_node(void)
 {
        int i;
        for (i = 0; i < NR_CPUS; i++) {
diff -r 8b1567102cf3 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Tue Sep 22 14:19:38 2009 +0100
+++ b/xen/arch/x86/platform_hypercall.c Thu Sep 24 00:06:42 2009 +0800
@@ -39,6 +39,8 @@ DEFINE_SPINLOCK(xenpf_lock);
 # define copy_to_compat copy_to_guest
 # undef guest_from_compat_handle
 # define guest_from_compat_handle(x,y) ((x)=(y))
+# undef compat_handle_is_null
+# define compat_handle_is_null guest_handle_is_null
 #else
 extern spinlock_t xenpf_lock;
 #endif
@@ -51,6 +53,12 @@ static long cpu_frequency_change_helper(
 static long cpu_frequency_change_helper(void *data)
 {
     return cpu_frequency_change(this_cpu(freq));
+}
+
+static long cpu_down_helper(void *data)
+{
+    int cpu = (unsigned long)data;
+    return cpu_down(cpu);
 }

 ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op)
@@ -385,7 +393,104 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
             break;
         }
         break;
-
+    case XENPF_get_cpuinfo:
+        {
+            int ncpu = num_present_cpus(), cpu, i;
+            struct xen_physical_cpuinfo *pcpus, *pcpu;
+            struct xenpf_pcpu_info *g_info;
+
+            ret = -EINVAL;
+            g_info = &op->u.pcpu_info;
+
+            if (op->u.pcpu_info.ncpus <= 0)
+                goto done;
+
+                       if ( compat_handle_is_null(g_info->info) )
+            {
+                ret = -EINVAL;
+                goto out;
+            }
+
+            ncpu = min(ncpu, (int)op->u.pcpu_info.ncpus);
+            pcpus = xmalloc_array(struct xen_physical_cpuinfo, ncpu);
+            ret = -ENOMEM;
+            if (pcpus == NULL)
+                goto out;
+
+            memset(pcpus, 0, sizeof(struct xen_physical_cpuinfo) * ncpu);
+
+            pcpu = pcpus;
+
+            i = 0;
+            spin_lock(&cpu_add_remove_lock);
+            for_each_present_cpu(cpu)
+            {
+                pcpu->xen_cpuid = cpu;
+                pcpu->apic_id = x86_cpu_to_apicid[cpu];
+                ASSERT(pcpu->apic_id != BAD_APICID);
+                if (cpu_online(cpu))
+                    pcpu->flags |= XEN_PCPU_FLAGS_ONLINE;
+                pcpu++;
+                if (i++ == ncpu)
+                    break;
+            }
+            spin_unlock(&cpu_add_remove_lock);
+            ret = -EFAULT;
+            if( copy_to_compat(g_info->info, pcpus, ncpu))
+                goto out;
+            xfree(pcpus);
+done:
+            op->u.pcpu_info.max_cpus = num_possible_cpus();
+            op->u.pcpu_info.ncpus = ncpu;
+            ret = copy_to_guest(u_xenpf_op, op, 1) ? -EFAULT : 0;
+        }
+        break;
+    case XENPF_resource_hotplug:
+    {
+        int cpu;
+
+        switch ( op->u.resource.sub_cmd)
+        {
+        case XEN_CPU_add:
+            ret = cpu_add(op->u.resource.u.sadd.apic_id,
+                             op->u.resource.u.sadd.acpi_id,
+                             op->u.resource.u.sadd.pxm);
+        break;
+        case XEN_CPU_remove:
+            ret = cpu_remove(op->u.resource.u.sremove.apic_id);
+        break;
+        case XEN_CPU_online:
+            cpu = op->u.resource.u.cpu_ol.cpuid;
+            if (!cpu_present(cpu))
+            {
+                ret = -EINVAL;
+                break;
+            }
+            else if (cpu_online(cpu))
+            {
+                ret = 0;
+                break;
+            }
+
+            ret = cpu_up(cpu);
+            break;
+        case XEN_CPU_offline:
+            cpu = op->u.resource.u.cpu_ol.cpuid;
+            if (!cpu_present(cpu))
+            {
+                ret = -EINVAL;
+                break;
+            } else if (!cpu_online(cpu))
+            {
+                ret = 0;
+                break;
+            }
+            ret = continue_hypercall_on_cpu(
+                0, cpu_down_helper, (void *)(unsigned long)cpu);
+            break;
+        }
+    }
+    break;
     default:
         ret = -ENOSYS;
         break;
diff -r 8b1567102cf3 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c      Tue Sep 22 14:19:38 2009 +0100
+++ b/xen/arch/x86/setup.c      Wed Sep 23 20:10:32 2009 +0800
@@ -254,7 +254,7 @@ static void __init init_idle_domain(void
     setup_idle_pagetable();
 }

-static void __init srat_detect_node(int cpu)
+void __devinit srat_detect_node(int cpu)
 {
     unsigned node;
     u32 apicid = x86_cpu_to_apicid[cpu];
@@ -982,6 +982,7 @@ void __init __start_xen(unsigned long mb
     if ( smp_found_config )
         get_smp_config();

+    prefill_possible_map();
 #ifdef CONFIG_X86_64
     /* Low mappings were only needed for some BIOS table parsing. */
     zap_low_mappings();
diff -r 8b1567102cf3 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c    Tue Sep 22 14:19:38 2009 +0100
+++ b/xen/arch/x86/smpboot.c    Thu Sep 24 00:11:23 2009 +0800
@@ -44,6 +44,7 @@
 #include <xen/softirq.h>
 #include <xen/serial.h>
 #include <xen/numa.h>
+#include <xen/event.h>
 #include <asm/current.h>
 #include <asm/mc146818rtc.h>
 #include <asm/desc.h>
@@ -104,7 +105,7 @@ DEFINE_PER_CPU(int, cpu_state) = { 0 };
 DEFINE_PER_CPU(int, cpu_state) = { 0 };

 static void *stack_base[NR_CPUS];
-static DEFINE_SPINLOCK(cpu_add_remove_lock);
+DEFINE_SPINLOCK(cpu_add_remove_lock);

 /*
  * The bootstrap kernel entry code has set these up. Save them for
@@ -795,6 +796,7 @@ static inline int alloc_cpu_id(void)
 {
        cpumask_t       tmp_map;
        int cpu;
+
        cpus_complement(tmp_map, cpu_present_map);
        cpu = first_cpu(tmp_map);
        if (cpu >= NR_CPUS)
@@ -1021,7 +1023,7 @@ EXPORT_SYMBOL(xquad_portio);

 static void __init smp_boot_cpus(unsigned int max_cpus)
 {
-       int apicid, cpu, bit, kicked;
+       int apicid, cpu, kicked;
 #ifdef BOGOMIPS
        unsigned long bogosum = 0;
 #endif
@@ -1105,20 +1107,22 @@ static void __init smp_boot_cpus(unsigne
        Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));

        kicked = 1;
-       for (bit = 0; kicked < NR_CPUS && bit < NR_CPUS; bit++) {
-               apicid = cpu_present_to_apicid(bit);
+    for_each_present_cpu ( cpu )
+    {
+               apicid = x86_cpu_to_apicid[cpu];
+
                /*
                 * Don't even attempt to start the boot CPU!
                 */
                if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID))
                        continue;

-               if (!check_apicid_present(apicid))
-                       continue;
+               BUG_ON(!check_apicid_present(apicid));
+
                if (max_cpus <= cpucount+1)
                        continue;

-               if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
+               if ( do_boot_cpu(apicid, cpu))
                        printk("CPU #%d not responding - cannot use it.\n",
                                                                apicid);
                else
@@ -1203,6 +1207,59 @@ void __init smp_prepare_cpus(unsigned in
        mtrr_aps_sync_begin();
 }

+int generic_processor_info(uint32_t apicid, int version)
+{
+    int cpu;
+
+       /*
+        * Validate version
+        */
+       if (version == 0x0) {
+               printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
+                               "fixing up to 0x10. (tell your hw vendor)\n",
+                               apicid);
+               version = 0x10;
+       }
+
+       if (num_processors >= NR_CPUS) {
+               printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+                       "  Processor ignored.\n", NR_CPUS);
+               return -1;
+       }
+
+       if (num_processors >= maxcpus) {
+               printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+                       " Processor ignored.\n", maxcpus);
+               return -1;
+       }
+
+       num_processors++;
+
+       if (apicid == boot_cpu_physical_apicid)
+               return 0;
+
+       apic_version[apicid] = version;
+
+       cpu = alloc_cpu_id();
+       BUG_ON(cpu <= 0);
+
+       physid_set(apicid, phys_cpu_present_map);
+
+       x86_cpu_to_apicid[cpu] = apicid;
+       cpu_set(cpu, cpu_possible_map);
+       cpu_set(cpu, cpu_present_map);
+
+       if (num_processors > 8) {
+               /*
+                * No need for processor or APIC checks: physical delivery
+                * (bigsmp) mode should always work.
+                */
+               def_to_bigsmp = 1;
+       }
+
+    return cpu;
+}
+
 void __devinit smp_prepare_boot_cpu(void)
 {
        cpu_set(smp_processor_id(), cpu_online_map);
@@ -1213,6 +1270,15 @@ void __devinit smp_prepare_boot_cpu(void
 }

 #ifdef CONFIG_HOTPLUG_CPU
+int prefill_possible_map(void)
+{
+       int i;
+
+       for (i = 0; i < NR_CPUS; i++)
+               cpu_set(i, cpu_possible_map);
+       return 0;
+}
+
 static void
 remove_siblinginfo(int cpu)
 {
@@ -1337,6 +1403,8 @@ int cpu_down(unsigned int cpu)

 out:
        spin_unlock(&cpu_add_remove_lock);
+    if (!err)
+        send_guest_global_virq(dom0, VIRQ_PCPU_STATE);
        return err;
 }

@@ -1357,6 +1425,8 @@ int cpu_up(unsigned int cpu)

 out:
        spin_unlock(&cpu_add_remove_lock);
+    if (!err)
+        send_guest_global_virq(dom0, VIRQ_PCPU_STATE);
        return err;
 }

@@ -1410,17 +1480,133 @@ void enable_nonboot_cpus(void)
         */
        smpboot_restore_warm_reset_vector();
 }
+
+int cpu_add(uint32_t apic_id, uint32_t acpi_id, uint32_t pxm)
+{
+    int cpu = -1;
+
+    dprintk(XENLOG_DEBUG, "cpu_add apic_id %x acpi_id %x pxm %x\n",
+             apic_id, acpi_id, pxm);
+
+    if ( acpi_id > MAX_MADT_ENTRIES || apic_id > MAX_APICS || pxm > 256 )
+        return -EINVAL;
+
+    /* Detect if the cpu has been added before */
+    if ( x86_acpiid_to_apicid[acpi_id] != 0xff)
+    {
+        if (x86_acpiid_to_apicid[acpi_id] != apic_id)
+            return -EINVAL;
+        else
+            return -EEXIST;
+    }
+
+    if ( physid_isset(apic_id, phys_cpu_present_map) )
+        return -EEXIST;
+
+       spin_lock(&cpu_add_remove_lock);
+
+    x86_acpiid_to_apicid[acpi_id] = apic_id;
+
+    cpu = mp_register_lapic(apic_id, 1);
+
+    if ( !srat_disabled() )
+    {
+        int node;
+
+        node = setup_node(pxm);
+        if (node < 0)
+        {
+            dprintk(XENLOG_WARNING, "Setup no failed for pxm %x\n", pxm);
+            goto failed;
+        }
+        apicid_to_node[apic_id] = node;
+    }
+
+    srat_detect_node(cpu);
+    numa_add_cpu(cpu);
+       spin_unlock(&cpu_add_remove_lock);
+    dprintk(XENLOG_INFO, "Add CPU %x with index %x\n", apic_id, cpu);
+    return cpu;
+failed:
+    dprintk(XENLOG_WARNING, "Failed to add cpu, apic_id %x acpi_id %x\n",
+            apic_id, acpi_id);
+    if (cpu > 0)
+        mp_unregister_lapic(apic_id, cpu);
+    x86_acpiid_to_apicid[acpi_id] = 0xff;
+    apicid_to_node[apic_id] = NUMA_NO_NODE;
+       spin_unlock(&cpu_add_remove_lock);
+
+    return cpu;
+}
+
+int cpu_remove(uint32_t apic_id)
+{
+    int cpu = -1, i;
+
+    if (!physid_isset(apic_id, phys_cpu_present_map))
+        return -EINVAL;
+
+       spin_lock(&cpu_add_remove_lock);
+
+    for_each_possible_cpu(cpu)
+    {
+        if (x86_cpu_to_apicid[cpu] == apic_id)
+            break;
+    }
+
+    if ( (cpu == -1) || !cpu_present(cpu) )
+    {
+        spin_unlock(&cpu_add_remove_lock);
+        return -ENXIO;
+    }
+
+    if (cpu_online(cpu))
+    {
+        spin_unlock(&cpu_add_remove_lock);
+        dprintk(XENLOG_WARNING, "Try to remove onlin cpu %x\n", cpu);
+        return -EBUSY;
+    }
+
+    mp_unregister_lapic(apic_id, cpu);
+    for (i = 0; i < MAX_MADT_ENTRIES; i++)
+    {
+        if (x86_acpiid_to_apicid[i] == apic_id)
+            x86_acpiid_to_apicid[i] = 0xff;
+    }
+    physid_clear(apic_id, phys_cpu_present_map);
+    apicid_to_node[apic_id] = NUMA_NO_NODE;
+       spin_unlock(&cpu_add_remove_lock);
+
+    return 0;
+}
+
 #else /* ... !CONFIG_HOTPLUG_CPU */
 int __cpu_disable(void)
 {
        return -ENOSYS;
 }

+int prefill_possible_map()
+{
+    return -ENOSYS;
+}
+
 void __cpu_die(unsigned int cpu)
 {
        /* We said "no" in __cpu_disable */
        BUG();
 }
+
+int cpu_add(uint32_t apic_id, uint32_t acpi_id, uint32_t pxm)
+{
+    return -ENOSYS;
+}
+
+int cpu_remove(uint32_t apic_id)
+{
+    return -ENOSYS;
+}
+
 #endif /* CONFIG_HOTPLUG_CPU */

 int __devinit __cpu_up(unsigned int cpu)
diff -r 8b1567102cf3 xen/arch/x86/srat.c
--- a/xen/arch/x86/srat.c       Tue Sep 22 14:19:38 2009 +0100
+++ b/xen/arch/x86/srat.c       Wed Sep 23 02:50:50 2009 +0800
@@ -33,15 +33,7 @@ static u8 pxm2node[256] = { [0 ... 255]

 static int node_to_pxm(int n);

-int pxm_to_node(int pxm)
-{
-       if ((unsigned)pxm >= 256)
-               return -1;
-       /* Extend 0xff to (int)-1 */
-       return (signed char)pxm2node[pxm];
-}
-
-static __init int setup_node(int pxm)
+__devinit int setup_node(int pxm)
 {
        unsigned node = pxm2node[pxm];
        if (node == 0xff) {
@@ -52,6 +44,14 @@ static __init int setup_node(int pxm)
                pxm2node[pxm] = node;
        }
        return pxm2node[pxm];
+}
+
+int pxm_to_node(int pxm)
+{
+       if ((unsigned)pxm >= 256)
+               return -1;
+       /* Extend 0xff to (int)-1 */
+       return (signed char)pxm2node[pxm];
 }

 static __init int conflicting_nodes(u64 start, u64 end)
@@ -91,11 +91,6 @@ static __init void bad_srat(void)
        acpi_numa = -1;
        for (i = 0; i < MAX_LOCAL_APIC; i++)
                apicid_to_node[i] = NUMA_NO_NODE;
-}
-
-static __init inline int srat_disabled(void)
-{
-       return numa_off || acpi_numa < 0;
 }

 /*
diff -r 8b1567102cf3 xen/arch/x86/x86_64/platform_hypercall.c
--- a/xen/arch/x86/x86_64/platform_hypercall.c  Tue Sep 22 14:19:38 2009 +0100
+++ b/xen/arch/x86/x86_64/platform_hypercall.c  Wed Sep 23 02:50:50 2009 +0800
@@ -23,6 +23,11 @@ DEFINE_XEN_GUEST_HANDLE(compat_platform_
 #define xen_processor_power_t   compat_processor_power_t
 #define set_cx_pminfo           compat_set_cx_pminfo

+#define xen_physical_cpuinfo compat_physical_cpuinfo
+#define xen_physical_cpuinfo_ compat_physical_cpuinfo_t
+#define xenpf_pcpu_info compat_pf_pcpu_info
+#define xenpf_pcpu_info_t compat_pf_pcpu_info_t
+
 #define xenpf_enter_acpi_sleep compat_pf_enter_acpi_sleep

 #define COMPAT
diff -r 8b1567102cf3 xen/common/domain.c
--- a/xen/common/domain.c       Tue Sep 22 14:19:38 2009 +0100
+++ b/xen/common/domain.c       Wed Sep 23 02:50:50 2009 +0800
@@ -170,9 +170,17 @@ struct vcpu *alloc_vcpu(
         return NULL;
     }

+    domain_lock(d);
     d->vcpu[vcpu_id] = v;
     if ( vcpu_id != 0 )
-        d->vcpu[v->vcpu_id-1]->next_in_list = v;
+    {
+        struct vcpu *tmp = d->vcpu[0];
+
+        while (tmp->next_in_list)
+            tmp = tmp->next_in_list;
+        tmp->next_in_list = v;
+    }
+    domain_unlock(d);

     /* Must be called after making new vcpu visible to for_each_vcpu(). */
     vcpu_check_shutdown(v);
diff -r 8b1567102cf3 xen/common/event_channel.c
--- a/xen/common/event_channel.c        Tue Sep 22 14:19:38 2009 +0100
+++ b/xen/common/event_channel.c        Thu Sep 24 02:29:07 2009 +0800
@@ -570,7 +570,7 @@ static int evtchn_set_pending(struct vcp
           vcpuid = find_next_bit(d->poll_mask, d->max_vcpus, vcpuid+1) )
     {
         v = d->vcpu[vcpuid];
-        if ( ((v->poll_evtchn <= 0) || (v->poll_evtchn == port)) &&
+        if ( v && ((v->poll_evtchn <= 0) || (v->poll_evtchn == port)) &&
              test_and_clear_bit(vcpuid, d->poll_mask) )
         {
             v->poll_evtchn = 0;
diff -r 8b1567102cf3 xen/include/asm-x86/acpi.h
--- a/xen/include/asm-x86/acpi.h        Tue Sep 22 14:19:38 2009 +0100
+++ b/xen/include/asm-x86/acpi.h        Wed Sep 23 02:50:50 2009 +0800
@@ -161,6 +161,7 @@ struct acpi_sleep_info {

 #endif /* CONFIG_ACPI_SLEEP */

+#define MAX_MADT_ENTRIES       256
 extern u8 x86_acpiid_to_apicid[];
 #define MAX_LOCAL_APIC 256

diff -r 8b1567102cf3 xen/include/asm-x86/mach-generic/mach_apic.h
--- a/xen/include/asm-x86/mach-generic/mach_apic.h      Tue Sep 22 14:19:38 
2009 +0100
+++ b/xen/include/asm-x86/mach-generic/mach_apic.h      Wed Sep 23 19:50:49 
2009 +0800
@@ -20,17 +20,6 @@ static inline void enable_apic_mode(void
 {
        /* Not needed for modern ES7000 which boot in Virtual Wire mode. */
        /*es7000_sw_apic();*/
-}
-
-#define apicid_to_node(apicid) ((int)apicid_to_node[(u32)apicid])
-
-extern u32 bios_cpu_apicid[];
-static inline int cpu_present_to_apicid(int mps_cpu)
-{
-       if (mps_cpu < NR_CPUS)
-               return (int)bios_cpu_apicid[mps_cpu];
-       else
-               return BAD_APICID;
 }

 static inline int mpc_apic_id(struct mpc_config_processor *m,
diff -r 8b1567102cf3 xen/include/asm-x86/mpspec.h
--- a/xen/include/asm-x86/mpspec.h      Tue Sep 22 14:19:38 2009 +0100
+++ b/xen/include/asm-x86/mpspec.h      Wed Sep 23 02:50:50 2009 +0800
@@ -12,6 +12,7 @@ extern int mp_bus_id_to_pci_bus [MAX_MP_
 extern int mp_bus_id_to_pci_bus [MAX_MP_BUSSES];

 extern unsigned int def_to_bigsmp;
+extern unsigned int num_processors;
 extern unsigned int boot_cpu_physical_apicid;
 extern int smp_found_config;
 extern void find_smp_config (void);
@@ -28,7 +29,8 @@ extern int using_apic_timer;
 extern int using_apic_timer;

 #ifdef CONFIG_ACPI
-extern void mp_register_lapic (u8 id, u8 enabled);
+extern int mp_register_lapic (uint32_t id, u8 enabled);
+extern void mp_unregister_lapic (uint32_t apic_id, uint32_t cpu);
 extern void mp_register_lapic_address (u64 address);
 extern void mp_register_ioapic (u8 id, u32 address, u32 gsi_base);
 extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 
gsi);
diff -r 8b1567102cf3 xen/include/asm-x86/numa.h
--- a/xen/include/asm-x86/numa.h        Tue Sep 22 14:19:38 2009 +0100
+++ b/xen/include/asm-x86/numa.h        Wed Sep 23 02:50:50 2009 +0800
@@ -30,7 +30,13 @@ extern void numa_init_array(void);
 extern void numa_init_array(void);
 extern int numa_off;

+static __devinit inline int srat_disabled(void)
+{
+       return numa_off || acpi_numa < 0;
+}
 extern void numa_set_node(int cpu, int node);
+extern int setup_node(int pxm);
+extern void srat_detect_node(int cpu);

 extern void setup_node_bootmem(int nodeid, u64 start, u64 end);
 extern unsigned char apicid_to_node[256];
diff -r 8b1567102cf3 xen/include/asm-x86/smp.h
--- a/xen/include/asm-x86/smp.h Tue Sep 22 14:19:38 2009 +0100
+++ b/xen/include/asm-x86/smp.h Thu Sep 24 00:09:58 2009 +0800
@@ -58,6 +58,7 @@ extern u32 cpu_2_logical_apicid[];
 #define CPU_DEAD       0x0004  /* CPU is dead */
 DECLARE_PER_CPU(int, cpu_state);

+extern spinlock_t(cpu_add_remove_lock);
 #ifdef CONFIG_HOTPLUG_CPU
 #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
 extern int cpu_down(unsigned int cpu);
@@ -66,11 +67,16 @@ extern void cpu_uninit(void);
 extern void cpu_uninit(void);
 extern void disable_nonboot_cpus(void);
 extern void enable_nonboot_cpus(void);
+extern int generic_processor_info(uint32_t apicid, int version);
 #else
 static inline int cpu_is_offline(int cpu) {return 0;}
 static inline void disable_nonboot_cpus(void) {}
 static inline void enable_nonboot_cpus(void) {}
 #endif
+extern int cpu_add(uint32_t apic_id, uint32_t acpi_id, uint32_t pxm);
+extern int prefill_possible_map(void);
+extern int cpu_remove(uint32_t apic_id);
+extern unsigned int maxcpus;

 /*
  * This function is needed by all SMP systems. It must _always_ be valid
diff -r 8b1567102cf3 xen/include/public/platform.h
--- a/xen/include/public/platform.h     Tue Sep 22 14:19:38 2009 +0100
+++ b/xen/include/public/platform.h     Thu Sep 24 00:07:34 2009 +0800
@@ -312,6 +312,60 @@ typedef struct xenpf_set_processor_pminf
 typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t;
 DEFINE_XEN_GUEST_HANDLE(xenpf_set_processor_pminfo_t);

+struct xenpf_hotadd_cpu
+{
+    uint32_t apic_id;
+    uint32_t acpi_id;
+    uint32_t pxm;
+};
+
+struct xenpf_hotremove_cpu
+{
+    uint32_t apic_id;
+};
+
+struct xenpf_cpu_ol
+{
+    uint32_t cpuid;
+};
+
+#define XENPF_resource_hotplug 55
+struct xenpf_resource_hotplug {
+    uint32_t sub_cmd;
+#define XEN_CPU_add      1
+#define XEN_CPU_remove   2
+#define XEN_CPU_online      3
+#define XEN_CPU_offline     4
+    union {
+        struct xenpf_hotadd_cpu sadd;
+        struct xenpf_hotremove_cpu sremove;
+        struct xenpf_cpu_ol   cpu_ol;
+    }u;
+};
+
+#define XENPF_get_cpuinfo 56
+struct xen_physical_cpuinfo {
+    uint32_t xen_cpuid;
+    uint32_t apic_id;
+#define XEN_PCPU_FLAGS_ONLINE 1
+    uint32_t flags;
+    uint32_t pad;
+};
+typedef struct xen_physical_cpuinfo xen_physical_cpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_physical_cpuinfo_t);
+
+struct xenpf_pcpu_info
+{
+    /* IN/OUT */
+    uint32_t ncpus;
+    /* OUT */
+    /* The possible CPU */
+    uint32_t max_cpus;
+    XEN_GUEST_HANDLE(xen_physical_cpuinfo_t) info;
+};
+typedef struct xenpf_pcpu_info xenpf_pcpu_info_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_pcpu_info_t);
+
 struct xen_platform_op {
     uint32_t cmd;
     uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -327,6 +381,8 @@ struct xen_platform_op {
         struct xenpf_change_freq       change_freq;
         struct xenpf_getidletime       getidletime;
         struct xenpf_set_processor_pminfo set_pminfo;
+        struct xenpf_resource_hotplug   resource;
+        struct xenpf_pcpu_info          pcpu_info;
         uint8_t                        pad[128];
     } u;
 };
diff -r 8b1567102cf3 xen/include/public/xen.h
--- a/xen/include/public/xen.h  Tue Sep 22 14:19:38 2009 +0100
+++ b/xen/include/public/xen.h  Wed Sep 23 02:50:50 2009 +0800
@@ -144,6 +144,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
 #define VIRQ_DEBUGGER   6  /* G. (DOM0) A domain has paused for debugging.   */
 #define VIRQ_XENOPROF   7  /* V. XenOprofile interrupt: new sample available */
 #define VIRQ_CON_RING   8  /* G. (DOM0) Bytes received on console            */
+#define VIRQ_PCPU_STATE 9  /* G. (DOM0) PCPU state changed                   */

 /* Architecture-specific VIRQ definitions. */
 #define VIRQ_ARCH_0    16

Attachment: xen_cpu_hotplug.patch
Description: xen_cpu_hotplug.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH 2/2] Add physical CPU hotplug support to Xen hypervisor, Jiang, Yunhong <=