| * Puthiyaparambil, Aravindh (aravindh.puthiyaparambil@xxxxxxxxxx) wrote:
> Is there any way for us to see an early copy of this patch? I think we
> need to come up with way for apic_xen to accommodate clustered apics too
> or there might be issues running on large systems.
Here's an update from the last copy I made (about one week old I think).
Many thanks to Xin who found my last bug so that 64-bit syscalls actually
worked!  I'd run this one quite successfully on dom0 (before refreshing to
a newer Xen snapshot, this actual patch is not more than compile tested).
I believe Xin is continuing on to work on domU (which conincidentally
has apic compilation issues), and may have a more up-to-date patch.
thanks,
-chris
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
@@ -43,7 +43,6 @@
 startup_64:
 ENTRY(_start)
         cld                
-       movq init_rsp(%rip),%rsp
        /* Copy the necessary stuff from xen_start_info structure. */
        movq  $xen_start_info_union,%rdi
        movq  $64,%rcx          /* sizeof (union xen_start_info_union) / sizeof 
(long) */
@@ -54,6 +53,7 @@ ENTRY(_start)
        cld
 #endif /* CONFIG_SMP */
 
+       movq init_rsp(%rip),%rsp
        /* zero EFLAGS after setting rsp */
        pushq $0
        popfq
@@ -140,6 +140,7 @@ ENTRY(cpu_gdt_table)
        .quad   0,0                     /* TSS */
        .quad   0,0                     /* LDT */
        .quad   0,0,0                   /* three TLS descriptors */ 
+       .quad   0                       /* unused now?   __KERNEL16_CS - 16bit 
PM for S3 wakeup. */
 
 gdt_end:       
        /* asm/segment.h:GDT_ENTRIES must match this */ 
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
@@ -25,10 +25,10 @@ obj-$(CONFIG_ACPI_BOOT)             += acpi/
 c-obj-$(CONFIG_X86_MSR)                += msr.o
 obj-$(CONFIG_MICROCODE)                += microcode.o
 obj-$(CONFIG_X86_CPUID)                += cpuid.o
-#obj-$(CONFIG_SMP)             += smp.o smpboot.o trampoline.o
+obj-$(CONFIG_SMP)              += smp.o smpboot.o
 obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o
 c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
-obj-$(CONFIG_X86_IO_APIC)      += io_apic.o mpparse.o
+obj-$(CONFIG_X86_IO_APIC)      += io_apic.o mpparse.o genapic_xen.o
 c-obj-$(CONFIG_X86_IO_APIC)    += genapic.o genapic_cluster.o genapic_flat.o
 #obj-$(CONFIG_PM)              += suspend.o
 #obj-$(CONFIG_SOFTWARE_SUSPEND)        += suspend_asm.o
@@ -54,7 +54,7 @@ intel_cacheinfo-y             += ../../../i386/kern
 quirks-y                       += ../../i386/kernel/quirks.o
 
 c-link := init_task.o
-s-link := vsyscall.o 
+s-link := vsyscall.o
 
 $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-obj-m) $(c-link)) $(patsubst 
%.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
        @ln -fsn $(srctree)/arch/x86_64/kernel/$(notdir $@) $@
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
@@ -259,13 +259,13 @@ void __init cpu_init (void)
         * Initialize the per-CPU GDT with the boot GDT,
         * and set up the GDT descriptor:
         */
+#ifndef CONFIG_XEN
        if (cpu) {
                memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
        }       
 
        cpu_gdt_descr[cpu].size = GDT_SIZE;
        cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
-#if 0
        asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
        asm volatile("lidt %0" :: "m" (idt_descr));
 #endif
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
@@ -599,6 +599,17 @@ static void __init print_memory_map(char
         }
 }
 
+void __init smp_alloc_memory(void)
+{
+       int cpu;
+
+       for (cpu = 1; cpu < NR_CPUS; cpu++) {
+               cpu_gdt_descr[cpu].address = (unsigned long)
+                       alloc_bootmem_low_pages(PAGE_SIZE);
+               /* XXX free unused pages later */
+       }
+}
+
 void __init setup_arch(char **cmdline_p)
 {
        int i, j;
@@ -738,6 +749,9 @@ void __init setup_arch(char **cmdline_p)
                }
        }
 #endif
+#ifdef CONFIG_SMP
+       smp_alloc_memory();
+#endif
        paging_init();
 #ifdef CONFIG_X86_LOCAL_APIC
        /*
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S
@@ -8,11 +8,14 @@
 #define sizeof_vcpu_shift              3
 
 #ifdef CONFIG_SMP
-#define preempt_disable(reg)   incl threadinfo_preempt_count(reg)
-#define preempt_enable(reg)    decl threadinfo_preempt_count(reg)
+//#define preempt_disable(reg) incl threadinfo_preempt_count(reg)
+//#define preempt_enable(reg)  decl threadinfo_preempt_count(reg)
+#define preempt_disable(reg)
+#define preempt_enable(reg)
 #define XEN_GET_VCPU_INFO(reg) preempt_disable(%rbp)                   ; \
                                movq %gs:pda_cpunumber,reg              ; \
-                               shl  $sizeof_vcpu_shift,reg             ; \
+                               shl  $32, reg                           ; \
+                               shr  $32-sizeof_vcpu_shift,reg          ; \
                                addq HYPERVISOR_shared_info,reg
 #define XEN_PUT_VCPU_INFO(reg) preempt_enable(%rbp)                    ; \
 #define XEN_PUT_VCPU_INFO_fixup .byte 0xff,0xff,0xff
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c
@@ -20,7 +20,11 @@
  */
 
 atomic_t irq_err_count;
-
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+atomic_t irq_mis_count;
+#endif
+#endif
 
 /*
  * Generic, controller-independent functions:
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c
@@ -28,6 +28,9 @@
 #include <asm/mmu_context.h>
 #include <asm/proto.h>
 #include <asm/apicdef.h>
+#include <asm-xen/evtchn.h>
+
+#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
 
 /*
  *     Smarter SMP flushing macros. 
@@ -103,6 +106,7 @@ static inline void leave_mm (unsigned lo
  * 2) Leave the mm if we are in the lazy tlb mode.
  */
 
+#if 0 /* Xen */
 asmlinkage void smp_invalidate_interrupt (void)
 {
        unsigned long cpu;
@@ -251,6 +255,20 @@ void flush_tlb_all(void)
        on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
 }
 
+#else
+asmlinkage void smp_invalidate_interrupt (void)
+{ return; }
+void flush_tlb_current_task(void)
+{ xen_tlb_flush_mask(¤t->mm->cpu_vm_mask); }
+void flush_tlb_mm (struct mm_struct * mm)
+{ xen_tlb_flush_mask(&mm->cpu_vm_mask); }
+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+{ xen_invlpg_mask(&vma->vm_mm->cpu_vm_mask, va); }
+void flush_tlb_all(void)
+{ xen_tlb_flush_all(); }
+#endif /* Xen */
+
+
 void smp_kdb_stop(void)
 {
        send_IPI_allbutself(KDB_VECTOR);
@@ -310,13 +328,13 @@ static void __smp_call_function (void (*
 
        /* Wait for response */
        while (atomic_read(&data.started) != cpus)
-               cpu_relax();
+               barrier();
 
        if (!wait)
                return;
 
        while (atomic_read(&data.finished) != cpus)
-               cpu_relax();
+               barrier();
 }
 
 /*
@@ -350,7 +368,11 @@ void smp_stop_cpu(void)
         */
        cpu_clear(smp_processor_id(), cpu_online_map);
        local_irq_disable();
+#ifdef CONFIG_XEN
+       xxprint("stop_this_cpu disable_local_APIC\n");
+#else
        disable_local_APIC();
+#endif
        local_irq_enable(); 
 }
 
@@ -364,8 +386,10 @@ static void smp_really_stop_cpu(void *du
 void smp_send_stop(void)
 {
        int nolock = 0;
+#ifndef CONFIG_XEN
        if (reboot_force)
                return;
+#endif
        /* Don't deadlock on the call lock in panic */
        if (!spin_trylock(&call_lock)) {
                /* ignore locking because we have paniced anyways */
@@ -376,7 +400,11 @@ void smp_send_stop(void)
                spin_unlock(&call_lock);
 
        local_irq_disable();
+#ifdef CONFIG_XEN
+       xxprint("stop_this_cpu disable_local_APIC\n");
+#else
        disable_local_APIC();
+#endif
        local_irq_enable();
 }
 
@@ -385,18 +413,17 @@ void smp_send_stop(void)
  * all the work is done automatically when
  * we return from the interrupt.
  */
-asmlinkage void smp_reschedule_interrupt(void)
+asmlinkage irqreturn_t smp_reschedule_interrupt(void)
 {
-       ack_APIC_irq();
+       return IRQ_HANDLED;
 }
 
-asmlinkage void smp_call_function_interrupt(void)
+asmlinkage irqreturn_t smp_call_function_interrupt(void)
 {
        void (*func) (void *info) = call_data->func;
        void *info = call_data->info;
        int wait = call_data->wait;
 
-       ack_APIC_irq();
        /*
         * Notify initiating CPU that I've grabbed the data and am
         * about to execute the function
@@ -413,6 +440,8 @@ asmlinkage void smp_call_function_interr
                mb();
                atomic_inc(&call_data->finished);
        }
+       
+       return IRQ_HANDLED;
 }
 
 int safe_smp_processor_id(void)
@@ -422,7 +451,10 @@ int safe_smp_processor_id(void)
        if (disable_apic)
                return 0;
 
-       apicid = hard_smp_processor_id();
+#ifdef CONFIG_XEN
+       return smp_processor_id();
+#endif
+
        if (x86_cpu_to_apicid[apicid] == apicid)
                return apicid;
 
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
@@ -953,6 +953,17 @@ void __init trap_init(void)
        cpu_init();
 }
 
+void smp_trap_init(trap_info_t *trap_ctxt)
+{
+       trap_info_t *t = trap_table;
+
+       for (t = trap_table; t->address; t++) {
+               trap_ctxt[t->vector].flags = t->flags;
+               trap_ctxt[t->vector].cs = t->cs;
+               trap_ctxt[t->vector].address = t->address;
+       }
+}
+
 
 /* Actual parsing is done early in setup.c. */
 static int __init oops_dummy(char *s)
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
@@ -47,6 +47,7 @@
 #include <linux/bootmem.h>
 #include <linux/thread_info.h>
 #include <linux/module.h>
+#include <linux/interrupt.h>
 
 #include <linux/delay.h>
 #include <linux/mc146818rtc.h>
@@ -57,6 +58,8 @@
 #include <asm/tlbflush.h>
 #include <asm/proto.h>
 #include <asm/nmi.h>
+#include <asm/mpspec.h>
+#include <asm/arch_hooks.h>
 
 /* Change for real CPU hotplug. Note other files need to be fixed
    first too. */
@@ -96,6 +99,7 @@ cpumask_t cpu_sibling_map[NR_CPUS] __cac
 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
 EXPORT_SYMBOL(cpu_core_map);
 
+#ifndef CONFIG_XEN
 /*
  * Trampoline 80x86 program as an array.
  */
@@ -115,6 +119,7 @@ static unsigned long __cpuinit setup_tra
        memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
        return virt_to_phys(tramp);
 }
+#endif
 
 /*
  * The bootstrap kernel entry code has set these up. Save them for
@@ -130,6 +135,7 @@ static void __cpuinit smp_store_cpu_info
        print_cpu_info(c);
 }
 
+#ifndef CONFIG_XEN
 /*
  * New Funky TSC sync algorithm borrowed from IA64.
  * Main advantage is that it doesn't reset the TSCs fully and
@@ -331,6 +337,7 @@ static __init int notscsync_setup(char *
        return 0;
 }
 __setup("notscsync", notscsync_setup);
+#endif
 
 static atomic_t init_deasserted __cpuinitdata;
 
@@ -343,6 +350,7 @@ void __cpuinit smp_callin(void)
        int cpuid, phys_id;
        unsigned long timeout;
 
+#ifndef CONFIG_XEN
        /*
         * If waken up by an INIT in an 82489DX configuration
         * we may get here before an INIT-deassert IPI reaches
@@ -352,10 +360,11 @@ void __cpuinit smp_callin(void)
        while (!atomic_read(&init_deasserted))
                cpu_relax();
 
+#endif
        /*
         * (This works even if the APIC is not enabled.)
         */
-       phys_id = GET_APIC_ID(apic_read(APIC_ID));
+       phys_id = smp_processor_id();
        cpuid = smp_processor_id();
        if (cpu_isset(cpuid, cpu_callin_map)) {
                panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
@@ -389,6 +398,7 @@ void __cpuinit smp_callin(void)
                        cpuid);
        }
 
+#ifndef CONFIG_XEN
        /*
         * the boot CPU has finished the init stage and is spinning
         * on callin_map until we finish. We are free to set up this
@@ -398,6 +408,7 @@ void __cpuinit smp_callin(void)
 
        Dprintk("CALLIN, before setup_local_APIC().\n");
        setup_local_APIC();
+#endif
 
        /*
         * Get our bogomips.
@@ -405,7 +416,9 @@ void __cpuinit smp_callin(void)
        calibrate_delay();
        Dprintk("Stack at about %p\n",&cpuid);
 
+#ifndef CONFIG_XEN
        disable_APIC_timer();
+#endif
 
        /*
         * Save our processor parameters
@@ -418,6 +431,26 @@ void __cpuinit smp_callin(void)
        cpu_set(cpuid, cpu_callin_map);
 }
 
+static irqreturn_t ldebug_interrupt(int irq, void *dev_id, struct pt_regs 
*regs)
+{
+       return IRQ_HANDLED;
+}
+
+static DEFINE_PER_CPU(int, ldebug_irq);
+static char ldebug_name[NR_CPUS][15];
+
+void ldebug_setup(void)
+{
+       int cpu = smp_processor_id();
+
+       per_cpu(ldebug_irq, cpu) = bind_virq_to_irq(VIRQ_DEBUG);
+       sprintf(ldebug_name[cpu], "ldebug%d", cpu);
+       BUG_ON(request_irq(per_cpu(ldebug_irq, cpu), ldebug_interrupt,
+                          SA_INTERRUPT, ldebug_name[cpu], NULL));
+}
+
+extern void local_setup_timer(void);
+
 /*
  * Setup code on secondary processor (after comming out of the trampoline)
  */
@@ -434,6 +467,7 @@ void __cpuinit start_secondary(void)
        /* otherwise gcc will move up the smp_processor_id before the cpu_init 
*/
        barrier();
 
+#ifndef CONFIG_XEN
        Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());         
        setup_secondary_APIC_clock();
 
@@ -446,6 +480,12 @@ void __cpuinit start_secondary(void)
        }
 
        enable_APIC_timer();
+#else
+       local_setup_timer();
+       ldebug_setup();
+       smp_intr_init();
+       local_irq_enable();
+#endif
 
        /*
         * Allow the master to continue.
@@ -453,10 +493,12 @@ void __cpuinit start_secondary(void)
        cpu_set(smp_processor_id(), cpu_online_map);
        mb();
 
+#ifndef CONFIG_XEN
        /* Wait for TSC sync to not schedule things before.
           We still process interrupts, which could see an inconsistent
           time in that window unfortunately. */
        tsc_sync_wait();
+#endif
 
        cpu_idle();
 }
@@ -502,6 +544,7 @@ static void inquire_remote_apic(int apic
 }
 #endif
 
+#ifndef CONFIG_XEN
 /*
  * Kick the secondary to wake up.
  */
@@ -627,6 +670,7 @@ static int __cpuinit wakeup_secondary_vi
 
        return (send_status | accept_status);
 }
+#endif
 
 /*
  * Boot one CPU.
@@ -637,6 +681,14 @@ static int __cpuinit do_boot_cpu(int cpu
        unsigned long boot_error;
        int timeout;
        unsigned long start_rip;
+#ifdef CONFIG_XEN
+       vcpu_guest_context_t ctxt;
+       extern void startup_64_smp(void);
+       extern void hypervisor_callback(void);
+       extern void failsafe_callback(void);
+       extern void smp_trap_init(trap_info_t *);
+       int i;
+#endif
        /*
         * We can't use kernel_thread since we must avoid to
         * reschedule the child.
@@ -649,7 +701,7 @@ static int __cpuinit do_boot_cpu(int cpu
 
        cpu_pda[cpu].pcurrent = idle;
 
-       start_rip = setup_trampoline();
+       start_rip = (unsigned long)startup_64_smp;
 
        init_rsp = idle->thread.rsp;
        per_cpu(init_tss,cpu).rsp0 = init_rsp;
@@ -666,6 +718,95 @@ static int __cpuinit do_boot_cpu(int cpu
 
        atomic_set(&init_deasserted, 0);
 
+#ifdef CONFIG_XEN
+       if (cpu_gdt_descr[0].size > PAGE_SIZE)
+               BUG();
+       cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
+       memcpy((void *)cpu_gdt_descr[cpu].address,
+               (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
+
+       memset(&ctxt, 0, sizeof(ctxt));
+
+       ctxt.flags = VGCF_IN_KERNEL;
+       ctxt.user_regs.ds = __USER_DS;
+       ctxt.user_regs.es = __USER_DS;
+       ctxt.user_regs.fs = 0;
+       ctxt.user_regs.gs = 0;
+       ctxt.user_regs.ss = __KERNEL_DS|0x3;
+       ctxt.user_regs.cs = __KERNEL_CS|0x3;
+       ctxt.user_regs.rip = start_rip;
+       ctxt.user_regs.rsp = idle->thread.rsp;
+       ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
+
+       /* FPU is set up to default initial state. */
+       memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+
+       /* Virtual IDT is empty at start-of-day. */
+       for ( i = 0; i < 256; i++ )
+       {
+               ctxt.trap_ctxt[i].vector = i;
+               ctxt.trap_ctxt[i].cs     = FLAT_KERNEL_CS;
+       }
+       smp_trap_init(ctxt.trap_ctxt);
+
+       /* No LDT. */
+       ctxt.ldt_ents = 0;
+
+       {
+               unsigned long va;
+               int f;
+
+               for (va = cpu_gdt_descr[cpu].address, f = 0;
+                    va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
+                    va += PAGE_SIZE, f++) {
+                       ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+                       make_page_readonly((void *)va);
+               }
+               ctxt.gdt_ents = GDT_ENTRIES;
+       }
+
+       /* Ring 1 stack is the initial stack. */
+       ctxt.kernel_ss = __KERNEL_DS;
+       ctxt.kernel_sp = idle->thread.rsp;
+
+       /* Callback handlers. */
+       ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
+       ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+       ctxt.syscall_callback_eip  = (unsigned long)system_call;
+
+       ctxt.ctrlreg[3] = (unsigned long)virt_to_machine(init_level4_pgt);
+
+       boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
+
+       if (!boot_error) {
+               /*
+                * allow APs to start initializing.
+                */
+               Dprintk("Before Callout %d.\n", cpu);
+               cpu_set(cpu, cpu_callout_map);
+               Dprintk("After Callout %d.\n", cpu);
+
+               /*
+                * Wait 5s total for a response
+                */
+               for (timeout = 0; timeout < 50000; timeout++) {
+                       if (cpu_isset(cpu, cpu_callin_map))
+                               break;  /* It has booted */
+                       udelay(100);
+               }
+
+               if (cpu_isset(cpu, cpu_callin_map)) {
+                       /* number CPUs logically, starting from 1 (BSP is 0) */
+                       Dprintk("OK.\n");
+                       printk("CPU%d: ", cpu);
+                       print_cpu_info(&cpu_data[cpu]);
+                       Dprintk("CPU has booted.\n");
+               } else {
+                       boot_error= 1;
+               }
+       }
+       x86_cpu_to_apicid[cpu] = apicid;
+#else
        Dprintk("Setting warm reset code and vector.\n");
 
        CMOS_WRITE(0xa, 0xf);
@@ -729,6 +870,7 @@ static int __cpuinit do_boot_cpu(int cpu
 #endif
                }
        }
+#endif
        if (boot_error) {
                cpu_clear(cpu, cpu_callout_map); /* was set here 
(do_boot_cpu()) */
                clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
@@ -857,11 +999,13 @@ static __cpuinit void enforce_max_cpus(u
  */
 static int __cpuinit smp_sanity_check(unsigned max_cpus)
 {
+#ifndef CONFIG_XEN
        if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
                printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
                       hard_smp_processor_id());
                physid_set(hard_smp_processor_id(), phys_cpu_present_map);
        }
+#endif
 
        /*
         * If we couldn't find an SMP configuration at boot time,
@@ -876,6 +1020,7 @@ static int __cpuinit smp_sanity_check(un
                return -1;
        }
 
+#ifndef CONFIG_XEN
        /*
         * Should not be necessary because the MP table should list the boot
         * CPU too, but we do it for the sake of robustness anyway.
@@ -896,16 +1041,17 @@ static int __cpuinit smp_sanity_check(un
                nr_ioapics = 0;
                return -1;
        }
+#endif
 
        /*
         * If SMP should be disabled, then really disable it!
         */
        if (!max_cpus) {
+               HYPERVISOR_shared_info->n_vcpu = 1;
                printk(KERN_INFO "SMP mode deactivated, forcing use of dummy 
APIC emulation.\n");
                nr_ioapics = 0;
                return -1;
        }
-
        return 0;
 }
 
@@ -928,7 +1074,7 @@ void __cpuinit smp_prepare_cpus(unsigned
         */
        for (i = 0; i < NR_CPUS; i++) {
                int apicid = cpu_present_to_apicid(i);
-               if (physid_isset(apicid, phys_cpu_present_map)) {
+               if (i < HYPERVISOR_shared_info->n_vcpu) {
                        cpu_set(i, cpu_present_map);
                        /* possible map would be different if we supported real
                           CPU hotplug. */
@@ -942,7 +1088,9 @@ void __cpuinit smp_prepare_cpus(unsigned
                return;
        }
 
+       smp_intr_init();
 
+#ifndef CONFIG_XEN
        /*
         * Switch from PIC to APIC mode.
         */
@@ -954,6 +1102,7 @@ void __cpuinit smp_prepare_cpus(unsigned
                      GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
                /* Or can we switch back to PIC here? */
        }
+#endif
 
        /*
         * Now start the IO-APICs
@@ -967,7 +1116,9 @@ void __cpuinit smp_prepare_cpus(unsigned
         * Set up local APIC timer on boot CPU.
         */
 
+#ifndef CONFIG_XEN
        setup_boot_APIC_clock();
+#endif
 }
 
 /*
@@ -1021,6 +1172,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
  */
 void __cpuinit smp_cpus_done(unsigned int max_cpus)
 {
+#ifndef CONFIG_XEN
        zap_low_mappings();
        smp_cleanup_boot();
 
@@ -1028,8 +1180,60 @@ void __cpuinit smp_cpus_done(unsigned in
        setup_ioapic_dest();
 #endif
 
-       detect_siblings();
        time_init_gtod();
 
        check_nmi_watchdog();
+#endif
+       detect_siblings();
+}
+
+extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
+extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
+
+static DEFINE_PER_CPU(int, resched_irq);
+static DEFINE_PER_CPU(int, callfunc_irq);
+static char resched_name[NR_CPUS][15];
+static char callfunc_name[NR_CPUS][15];
+
+void smp_intr_init(void)
+{
+       int cpu = smp_processor_id();
+
+       per_cpu(resched_irq, cpu) =
+               bind_ipi_to_irq(RESCHEDULE_VECTOR);
+       sprintf(resched_name[cpu], "resched%d", cpu);
+       BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
+                          SA_INTERRUPT, resched_name[cpu], NULL));
+
+       per_cpu(callfunc_irq, cpu) =
+               bind_ipi_to_irq(CALL_FUNCTION_VECTOR);
+       sprintf(callfunc_name[cpu], "callfunc%d", cpu);
+       BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
+                          smp_call_function_interrupt,
+                          SA_INTERRUPT, callfunc_name[cpu], NULL));
+}
+
+static void smp_intr_exit(void)
+{
+       int cpu = smp_processor_id();
+
+       free_irq(per_cpu(resched_irq, cpu), NULL);
+       unbind_ipi_from_irq(RESCHEDULE_VECTOR);
+
+       free_irq(per_cpu(callfunc_irq, cpu), NULL);
+       unbind_ipi_from_irq(CALL_FUNCTION_VECTOR);
+}
+
+void smp_suspend(void)
+{
+       /* XXX todo: take down time and ipi's on all cpus */
+       local_teardown_timer_irq();
+       smp_intr_exit();
+}
+
+void smp_resume(void)
+{
+       /* XXX todo: restore time and ipi's on all cpus */
+       smp_intr_init();
+       local_setup_timer_irq();
 }
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c
@@ -105,7 +105,6 @@ static int __init mpf_checksum(unsigned 
        return sum & 0xFF;
 }
 
-#ifndef CONFIG_XEN
 static void __init MP_processor_info (struct mpc_config_processor *m)
 {
        int ver;
@@ -162,12 +161,6 @@ static void __init MP_processor_info (st
                x86_cpu_to_apicid[num_processors - found_bsp] = m->mpc_apicid;
        }
 }
-#else
-void __init MP_processor_info (struct mpc_config_processor *m)
-{
-       num_processors++;
-}
-#endif /* CONFIG_XEN */
 
 static void __init MP_bus_info (struct mpc_config_bus *m)
 {
@@ -702,7 +695,6 @@ void __init mp_register_lapic (
        if (id == boot_cpu_physical_apicid)
                boot_cpu = 1;
 
-#ifndef CONFIG_XEN
        processor.mpc_type = MP_PROCESSOR;
        processor.mpc_apicid = id;
        processor.mpc_apicver = 0x10; /* TBD: lapic version */
@@ -713,7 +705,6 @@ void __init mp_register_lapic (
        processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
        processor.mpc_reserved[0] = 0;
        processor.mpc_reserved[1] = 0;
-#endif
 
        MP_processor_info(&processor);
 }
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic.c
===================================================================
--- /dev/null
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Generic APIC sub-arch probe layer.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ */
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <asm/smp.h>
+#include <asm/ipi.h>
+
+#if defined(CONFIG_ACPI_BUS)
+#include <acpi/acpi_bus.h>
+#endif
+
+/* which logical CPU number maps to which CPU (physical APIC ID) */
+u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+EXPORT_SYMBOL(x86_cpu_to_apicid);
+u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+extern struct genapic apic_cluster;
+extern struct genapic apic_flat;
+extern struct genapic apic_xen;
+
+struct genapic *genapic = &apic_xen;
+
+/*
+ * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
+ */
+void __init clustered_apic_check(void)
+{
+       long i;
+       u8 clusters, max_cluster;
+       u8 id;
+       u8 cluster_cnt[NUM_APIC_CLUSTERS];
+
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+               /* AMD always uses flat mode right now */
+               genapic = &apic_flat;
+               goto print;
+       }
+
+#if defined(CONFIG_ACPI_BUS)
+       /*
+        * Some x86_64 machines use physical APIC mode regardless of how many
+        * procs/clusters are present (x86_64 ES7000 is an example).
+        */
+       if (acpi_fadt.revision > FADT2_REVISION_ID)
+               if (acpi_fadt.force_apic_physical_destination_mode) {
+                       genapic = &apic_cluster;
+                       goto print;
+               }
+#endif
+
+       memset(cluster_cnt, 0, sizeof(cluster_cnt));
+
+       for (i = 0; i < NR_CPUS; i++) {
+               id = bios_cpu_apicid[i];
+               if (id != BAD_APICID)
+                       cluster_cnt[APIC_CLUSTERID(id)]++;
+       }
+
+       clusters = 0;
+       max_cluster = 0;
+       for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
+               if (cluster_cnt[i] > 0) {
+                       ++clusters;
+                       if (cluster_cnt[i] > max_cluster)
+                               max_cluster = cluster_cnt[i];
+               }
+       }
+
+       /*
+        * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode,
+        * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical
+        * else physical mode.
+        * (We don't use lowest priority delivery + HW APIC IRQ steering, so
+        * can ignore the clustered logical case and go straight to physical.)
+        */
+       if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster)
+               genapic = &apic_flat;
+       else
+               genapic = &apic_cluster;
+
+print:
+       /* hardcode to xen apic functions */
+       genapic = &apic_xen;
+       printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
+}
+
+/* Same for both flat and clustered. */
+
+extern void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned 
int dest);
+
+void send_IPI_self(int vector)
+{
+       xen_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+}
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic_xen.c
===================================================================
--- /dev/null
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic_xen.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Xen APIC subarch code.  Maximum 8 CPUs, logical delivery.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ *
+ * Hacked to pieces for Xen by Chris Wright.
+ */
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#include <asm-xen/evtchn.h>
+
+DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
+
+static inline void __send_IPI_one(unsigned int cpu, int vector)
+{
+       unsigned int evtchn;
+       Dprintk("%s\n", __FUNCTION__);
+
+       evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
+       if (evtchn)
+               notify_via_evtchn(evtchn);
+       else
+               printk("send_IPI to unbound port %d/%d", cpu, vector);
+
+}
+
+void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int 
dest)
+{
+       int cpu;
+
+       switch (shortcut) {
+       case APIC_DEST_SELF:
+               __send_IPI_one(smp_processor_id(), vector);
+               break;
+       case APIC_DEST_ALLBUT:
+               for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+                       if (cpu == smp_processor_id())
+                               continue;
+                       if (cpu_isset(cpu, cpu_online_map)) {
+                               __send_IPI_one(cpu, vector);
+                       }
+               }
+               break;
+       case APIC_DEST_ALLINC:
+               for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+                       if (cpu_isset(cpu, cpu_online_map)) {
+                               __send_IPI_one(cpu, vector);
+                       }
+               }
+               break;
+       default:
+               printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
+                      vector);
+               break;
+       }
+
+}
+
+static cpumask_t xen_target_cpus(void)
+{
+       return cpu_online_map;
+}
+
+/*
+ * Set up the logical destination ID.
+ * Do nothing, not called now.
+ */
+static void xen_init_apic_ldr(void)
+{
+       Dprintk("%s\n", __FUNCTION__);
+       return;
+}
+
+static void xen_send_IPI_allbutself(int vector)
+{
+       /*
+        * if there are no other CPUs in the system then
+        * we get an APIC send error if we try to broadcast.
+        * thus we have to avoid sending IPIs in this case.
+        */
+       Dprintk("%s\n", __FUNCTION__);
+       if (num_online_cpus() > 1)
+               xen_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, 
APIC_DEST_LOGICAL);
+}
+
+static void xen_send_IPI_all(int vector)
+{
+       Dprintk("%s\n", __FUNCTION__);
+       xen_send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
+}
+
+static void xen_send_IPI_mask(cpumask_t cpumask, int vector)
+{
+       unsigned long mask = cpus_addr(cpumask)[0];
+       unsigned int cpu;
+       unsigned long flags;
+
+       Dprintk("%s\n", __FUNCTION__);
+       local_irq_save(flags);
+       WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
+
+       for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+               if (cpu_isset(cpu, cpumask)) {
+                       __send_IPI_one(cpu, vector);
+               }
+       }
+       local_irq_restore(flags);
+}
+
+static int xen_apic_id_registered(void)
+{
+       /* better be set */
+       Dprintk("%s\n", __FUNCTION__);
+       return physid_isset(smp_processor_id(), phys_cpu_present_map);
+}
+
+static unsigned int xen_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+       Dprintk("%s\n", __FUNCTION__);
+       return cpus_addr(cpumask)[0] & APIC_ALL_CPUS;
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+       u32 ebx;
+
+       Dprintk("%s\n", __FUNCTION__);
+       ebx = cpuid_ebx(1);
+       return ((ebx >> 24) & 0xFF) >> index_msb;
+}
+
+struct genapic apic_xen =  {
+       .name = "xen",
+       .int_delivery_mode = dest_LowestPrio,
+       .int_dest_mode = (APIC_DEST_LOGICAL != 0),
+       .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
+       .target_cpus = xen_target_cpus,
+       .apic_id_registered = xen_apic_id_registered,
+       .init_apic_ldr = xen_init_apic_ldr,
+       .send_IPI_all = xen_send_IPI_all,
+       .send_IPI_allbutself = xen_send_IPI_allbutself,
+       .send_IPI_mask = xen_send_IPI_mask,
+       .cpu_mask_to_apicid = xen_cpu_mask_to_apicid,
+       .phys_pkg_id = phys_pkg_id,
+};
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
 |