WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

Re: [Xen-devel] Genapic in 32-bit Dom0 (Was : More Problems booting32-bi

To: "Puthiyaparambil, Aravindh" <aravindh.puthiyaparambil@xxxxxxxxxx>
Subject: Re: [Xen-devel] Genapic in 32-bit Dom0 (Was : More Problems booting32-bit Domain 0 on ES7000 x86_64)
From: Chris Wright <chrisw@xxxxxxxx>
Date: Mon, 8 Aug 2005 00:23:15 -0700
Cc: "Magolan, John F" <John.Magolan@xxxxxxxxxx>, xin.b.li@xxxxxxxxx, Chris Wright <chrisw@xxxxxxxx>, xen-devel@xxxxxxxxxxxxxxxxxxx, "Subrahmanian, Raj" <raj.subrahmanian@xxxxxxxxxx>, "Vessey, Bruce A" <Bruce.Vessey@xxxxxxxxxx>
Delivery-date: Mon, 08 Aug 2005 07:21:48 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
In-reply-to: <EF8D308BE33AF54D8934DF26520252D3025E7A8F@xxxxxxxxxxxxxxxxxxxxxxxxxxxx>
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
References: <EF8D308BE33AF54D8934DF26520252D3025E7A8F@xxxxxxxxxxxxxxxxxxxxxxxxxxxx>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Mutt/1.5.6i
* Puthiyaparambil, Aravindh (aravindh.puthiyaparambil@xxxxxxxxxx) wrote:
> Is there any way for us to see an early copy of this patch? I think we
> need to come up with way for apic_xen to accommodate clustered apics too
> or there might be issues running on large systems.

Here's an update from the last copy I made (about one week old I think).
Many thanks to Xin who found my last bug so that 64-bit syscalls actually
worked!  I'd run this one quite successfully on dom0 (before refreshing to
a newer Xen snapshot, this actual patch is not more than compile tested).
I believe Xin is continuing on to work on domU (which conincidentally
has apic compilation issues), and may have a more up-to-date patch.

thanks,
-chris

Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
@@ -43,7 +43,6 @@
 startup_64:
 ENTRY(_start)
         cld                
-       movq init_rsp(%rip),%rsp
        /* Copy the necessary stuff from xen_start_info structure. */
        movq  $xen_start_info_union,%rdi
        movq  $64,%rcx          /* sizeof (union xen_start_info_union) / sizeof 
(long) */
@@ -54,6 +53,7 @@ ENTRY(_start)
        cld
 #endif /* CONFIG_SMP */
 
+       movq init_rsp(%rip),%rsp
        /* zero EFLAGS after setting rsp */
        pushq $0
        popfq
@@ -140,6 +140,7 @@ ENTRY(cpu_gdt_table)
        .quad   0,0                     /* TSS */
        .quad   0,0                     /* LDT */
        .quad   0,0,0                   /* three TLS descriptors */ 
+       .quad   0                       /* unused now?   __KERNEL16_CS - 16bit 
PM for S3 wakeup. */
 
 gdt_end:       
        /* asm/segment.h:GDT_ENTRIES must match this */ 
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
@@ -25,10 +25,10 @@ obj-$(CONFIG_ACPI_BOOT)             += acpi/
 c-obj-$(CONFIG_X86_MSR)                += msr.o
 obj-$(CONFIG_MICROCODE)                += microcode.o
 obj-$(CONFIG_X86_CPUID)                += cpuid.o
-#obj-$(CONFIG_SMP)             += smp.o smpboot.o trampoline.o
+obj-$(CONFIG_SMP)              += smp.o smpboot.o
 obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o
 c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
-obj-$(CONFIG_X86_IO_APIC)      += io_apic.o mpparse.o
+obj-$(CONFIG_X86_IO_APIC)      += io_apic.o mpparse.o genapic_xen.o
 c-obj-$(CONFIG_X86_IO_APIC)    += genapic.o genapic_cluster.o genapic_flat.o
 #obj-$(CONFIG_PM)              += suspend.o
 #obj-$(CONFIG_SOFTWARE_SUSPEND)        += suspend_asm.o
@@ -54,7 +54,7 @@ intel_cacheinfo-y             += ../../../i386/kern
 quirks-y                       += ../../i386/kernel/quirks.o
 
 c-link := init_task.o
-s-link := vsyscall.o 
+s-link := vsyscall.o
 
 $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-obj-m) $(c-link)) $(patsubst 
%.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
        @ln -fsn $(srctree)/arch/x86_64/kernel/$(notdir $@) $@
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
@@ -259,13 +259,13 @@ void __init cpu_init (void)
         * Initialize the per-CPU GDT with the boot GDT,
         * and set up the GDT descriptor:
         */
+#ifndef CONFIG_XEN
        if (cpu) {
                memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
        }       
 
        cpu_gdt_descr[cpu].size = GDT_SIZE;
        cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
-#if 0
        asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
        asm volatile("lidt %0" :: "m" (idt_descr));
 #endif
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
@@ -599,6 +599,17 @@ static void __init print_memory_map(char
         }
 }
 
+void __init smp_alloc_memory(void)
+{
+       int cpu;
+
+       for (cpu = 1; cpu < NR_CPUS; cpu++) {
+               cpu_gdt_descr[cpu].address = (unsigned long)
+                       alloc_bootmem_low_pages(PAGE_SIZE);
+               /* XXX free unused pages later */
+       }
+}
+
 void __init setup_arch(char **cmdline_p)
 {
        int i, j;
@@ -738,6 +749,9 @@ void __init setup_arch(char **cmdline_p)
                }
        }
 #endif
+#ifdef CONFIG_SMP
+       smp_alloc_memory();
+#endif
        paging_init();
 #ifdef CONFIG_X86_LOCAL_APIC
        /*
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S
@@ -8,11 +8,14 @@
 #define sizeof_vcpu_shift              3
 
 #ifdef CONFIG_SMP
-#define preempt_disable(reg)   incl threadinfo_preempt_count(reg)
-#define preempt_enable(reg)    decl threadinfo_preempt_count(reg)
+//#define preempt_disable(reg) incl threadinfo_preempt_count(reg)
+//#define preempt_enable(reg)  decl threadinfo_preempt_count(reg)
+#define preempt_disable(reg)
+#define preempt_enable(reg)
 #define XEN_GET_VCPU_INFO(reg) preempt_disable(%rbp)                   ; \
                                movq %gs:pda_cpunumber,reg              ; \
-                               shl  $sizeof_vcpu_shift,reg             ; \
+                               shl  $32, reg                           ; \
+                               shr  $32-sizeof_vcpu_shift,reg          ; \
                                addq HYPERVISOR_shared_info,reg
 #define XEN_PUT_VCPU_INFO(reg) preempt_enable(%rbp)                    ; \
 #define XEN_PUT_VCPU_INFO_fixup .byte 0xff,0xff,0xff
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c
@@ -20,7 +20,11 @@
  */
 
 atomic_t irq_err_count;
-
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+atomic_t irq_mis_count;
+#endif
+#endif
 
 /*
  * Generic, controller-independent functions:
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c
@@ -28,6 +28,9 @@
 #include <asm/mmu_context.h>
 #include <asm/proto.h>
 #include <asm/apicdef.h>
+#include <asm-xen/evtchn.h>
+
+#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
 
 /*
  *     Smarter SMP flushing macros. 
@@ -103,6 +106,7 @@ static inline void leave_mm (unsigned lo
  * 2) Leave the mm if we are in the lazy tlb mode.
  */
 
+#if 0 /* Xen */
 asmlinkage void smp_invalidate_interrupt (void)
 {
        unsigned long cpu;
@@ -251,6 +255,20 @@ void flush_tlb_all(void)
        on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
 }
 
+#else
+asmlinkage void smp_invalidate_interrupt (void)
+{ return; }
+void flush_tlb_current_task(void)
+{ xen_tlb_flush_mask(&current->mm->cpu_vm_mask); }
+void flush_tlb_mm (struct mm_struct * mm)
+{ xen_tlb_flush_mask(&mm->cpu_vm_mask); }
+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+{ xen_invlpg_mask(&vma->vm_mm->cpu_vm_mask, va); }
+void flush_tlb_all(void)
+{ xen_tlb_flush_all(); }
+#endif /* Xen */
+
+
 void smp_kdb_stop(void)
 {
        send_IPI_allbutself(KDB_VECTOR);
@@ -310,13 +328,13 @@ static void __smp_call_function (void (*
 
        /* Wait for response */
        while (atomic_read(&data.started) != cpus)
-               cpu_relax();
+               barrier();
 
        if (!wait)
                return;
 
        while (atomic_read(&data.finished) != cpus)
-               cpu_relax();
+               barrier();
 }
 
 /*
@@ -350,7 +368,11 @@ void smp_stop_cpu(void)
         */
        cpu_clear(smp_processor_id(), cpu_online_map);
        local_irq_disable();
+#ifdef CONFIG_XEN
+       xxprint("stop_this_cpu disable_local_APIC\n");
+#else
        disable_local_APIC();
+#endif
        local_irq_enable(); 
 }
 
@@ -364,8 +386,10 @@ static void smp_really_stop_cpu(void *du
 void smp_send_stop(void)
 {
        int nolock = 0;
+#ifndef CONFIG_XEN
        if (reboot_force)
                return;
+#endif
        /* Don't deadlock on the call lock in panic */
        if (!spin_trylock(&call_lock)) {
                /* ignore locking because we have paniced anyways */
@@ -376,7 +400,11 @@ void smp_send_stop(void)
                spin_unlock(&call_lock);
 
        local_irq_disable();
+#ifdef CONFIG_XEN
+       xxprint("stop_this_cpu disable_local_APIC\n");
+#else
        disable_local_APIC();
+#endif
        local_irq_enable();
 }
 
@@ -385,18 +413,17 @@ void smp_send_stop(void)
  * all the work is done automatically when
  * we return from the interrupt.
  */
-asmlinkage void smp_reschedule_interrupt(void)
+asmlinkage irqreturn_t smp_reschedule_interrupt(void)
 {
-       ack_APIC_irq();
+       return IRQ_HANDLED;
 }
 
-asmlinkage void smp_call_function_interrupt(void)
+asmlinkage irqreturn_t smp_call_function_interrupt(void)
 {
        void (*func) (void *info) = call_data->func;
        void *info = call_data->info;
        int wait = call_data->wait;
 
-       ack_APIC_irq();
        /*
         * Notify initiating CPU that I've grabbed the data and am
         * about to execute the function
@@ -413,6 +440,8 @@ asmlinkage void smp_call_function_interr
                mb();
                atomic_inc(&call_data->finished);
        }
+       
+       return IRQ_HANDLED;
 }
 
 int safe_smp_processor_id(void)
@@ -422,7 +451,10 @@ int safe_smp_processor_id(void)
        if (disable_apic)
                return 0;
 
-       apicid = hard_smp_processor_id();
+#ifdef CONFIG_XEN
+       return smp_processor_id();
+#endif
+
        if (x86_cpu_to_apicid[apicid] == apicid)
                return apicid;
 
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
@@ -953,6 +953,17 @@ void __init trap_init(void)
        cpu_init();
 }
 
+void smp_trap_init(trap_info_t *trap_ctxt)
+{
+       trap_info_t *t = trap_table;
+
+       for (t = trap_table; t->address; t++) {
+               trap_ctxt[t->vector].flags = t->flags;
+               trap_ctxt[t->vector].cs = t->cs;
+               trap_ctxt[t->vector].address = t->address;
+       }
+}
+
 
 /* Actual parsing is done early in setup.c. */
 static int __init oops_dummy(char *s)
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
@@ -47,6 +47,7 @@
 #include <linux/bootmem.h>
 #include <linux/thread_info.h>
 #include <linux/module.h>
+#include <linux/interrupt.h>
 
 #include <linux/delay.h>
 #include <linux/mc146818rtc.h>
@@ -57,6 +58,8 @@
 #include <asm/tlbflush.h>
 #include <asm/proto.h>
 #include <asm/nmi.h>
+#include <asm/mpspec.h>
+#include <asm/arch_hooks.h>
 
 /* Change for real CPU hotplug. Note other files need to be fixed
    first too. */
@@ -96,6 +99,7 @@ cpumask_t cpu_sibling_map[NR_CPUS] __cac
 cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
 EXPORT_SYMBOL(cpu_core_map);
 
+#ifndef CONFIG_XEN
 /*
  * Trampoline 80x86 program as an array.
  */
@@ -115,6 +119,7 @@ static unsigned long __cpuinit setup_tra
        memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
        return virt_to_phys(tramp);
 }
+#endif
 
 /*
  * The bootstrap kernel entry code has set these up. Save them for
@@ -130,6 +135,7 @@ static void __cpuinit smp_store_cpu_info
        print_cpu_info(c);
 }
 
+#ifndef CONFIG_XEN
 /*
  * New Funky TSC sync algorithm borrowed from IA64.
  * Main advantage is that it doesn't reset the TSCs fully and
@@ -331,6 +337,7 @@ static __init int notscsync_setup(char *
        return 0;
 }
 __setup("notscsync", notscsync_setup);
+#endif
 
 static atomic_t init_deasserted __cpuinitdata;
 
@@ -343,6 +350,7 @@ void __cpuinit smp_callin(void)
        int cpuid, phys_id;
        unsigned long timeout;
 
+#ifndef CONFIG_XEN
        /*
         * If waken up by an INIT in an 82489DX configuration
         * we may get here before an INIT-deassert IPI reaches
@@ -352,10 +360,11 @@ void __cpuinit smp_callin(void)
        while (!atomic_read(&init_deasserted))
                cpu_relax();
 
+#endif
        /*
         * (This works even if the APIC is not enabled.)
         */
-       phys_id = GET_APIC_ID(apic_read(APIC_ID));
+       phys_id = smp_processor_id();
        cpuid = smp_processor_id();
        if (cpu_isset(cpuid, cpu_callin_map)) {
                panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
@@ -389,6 +398,7 @@ void __cpuinit smp_callin(void)
                        cpuid);
        }
 
+#ifndef CONFIG_XEN
        /*
         * the boot CPU has finished the init stage and is spinning
         * on callin_map until we finish. We are free to set up this
@@ -398,6 +408,7 @@ void __cpuinit smp_callin(void)
 
        Dprintk("CALLIN, before setup_local_APIC().\n");
        setup_local_APIC();
+#endif
 
        /*
         * Get our bogomips.
@@ -405,7 +416,9 @@ void __cpuinit smp_callin(void)
        calibrate_delay();
        Dprintk("Stack at about %p\n",&cpuid);
 
+#ifndef CONFIG_XEN
        disable_APIC_timer();
+#endif
 
        /*
         * Save our processor parameters
@@ -418,6 +431,26 @@ void __cpuinit smp_callin(void)
        cpu_set(cpuid, cpu_callin_map);
 }
 
+static irqreturn_t ldebug_interrupt(int irq, void *dev_id, struct pt_regs 
*regs)
+{
+       return IRQ_HANDLED;
+}
+
+static DEFINE_PER_CPU(int, ldebug_irq);
+static char ldebug_name[NR_CPUS][15];
+
+void ldebug_setup(void)
+{
+       int cpu = smp_processor_id();
+
+       per_cpu(ldebug_irq, cpu) = bind_virq_to_irq(VIRQ_DEBUG);
+       sprintf(ldebug_name[cpu], "ldebug%d", cpu);
+       BUG_ON(request_irq(per_cpu(ldebug_irq, cpu), ldebug_interrupt,
+                          SA_INTERRUPT, ldebug_name[cpu], NULL));
+}
+
+extern void local_setup_timer(void);
+
 /*
  * Setup code on secondary processor (after comming out of the trampoline)
  */
@@ -434,6 +467,7 @@ void __cpuinit start_secondary(void)
        /* otherwise gcc will move up the smp_processor_id before the cpu_init 
*/
        barrier();
 
+#ifndef CONFIG_XEN
        Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());         
        setup_secondary_APIC_clock();
 
@@ -446,6 +480,12 @@ void __cpuinit start_secondary(void)
        }
 
        enable_APIC_timer();
+#else
+       local_setup_timer();
+       ldebug_setup();
+       smp_intr_init();
+       local_irq_enable();
+#endif
 
        /*
         * Allow the master to continue.
@@ -453,10 +493,12 @@ void __cpuinit start_secondary(void)
        cpu_set(smp_processor_id(), cpu_online_map);
        mb();
 
+#ifndef CONFIG_XEN
        /* Wait for TSC sync to not schedule things before.
           We still process interrupts, which could see an inconsistent
           time in that window unfortunately. */
        tsc_sync_wait();
+#endif
 
        cpu_idle();
 }
@@ -502,6 +544,7 @@ static void inquire_remote_apic(int apic
 }
 #endif
 
+#ifndef CONFIG_XEN
 /*
  * Kick the secondary to wake up.
  */
@@ -627,6 +670,7 @@ static int __cpuinit wakeup_secondary_vi
 
        return (send_status | accept_status);
 }
+#endif
 
 /*
  * Boot one CPU.
@@ -637,6 +681,14 @@ static int __cpuinit do_boot_cpu(int cpu
        unsigned long boot_error;
        int timeout;
        unsigned long start_rip;
+#ifdef CONFIG_XEN
+       vcpu_guest_context_t ctxt;
+       extern void startup_64_smp(void);
+       extern void hypervisor_callback(void);
+       extern void failsafe_callback(void);
+       extern void smp_trap_init(trap_info_t *);
+       int i;
+#endif
        /*
         * We can't use kernel_thread since we must avoid to
         * reschedule the child.
@@ -649,7 +701,7 @@ static int __cpuinit do_boot_cpu(int cpu
 
        cpu_pda[cpu].pcurrent = idle;
 
-       start_rip = setup_trampoline();
+       start_rip = (unsigned long)startup_64_smp;
 
        init_rsp = idle->thread.rsp;
        per_cpu(init_tss,cpu).rsp0 = init_rsp;
@@ -666,6 +718,95 @@ static int __cpuinit do_boot_cpu(int cpu
 
        atomic_set(&init_deasserted, 0);
 
+#ifdef CONFIG_XEN
+       if (cpu_gdt_descr[0].size > PAGE_SIZE)
+               BUG();
+       cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
+       memcpy((void *)cpu_gdt_descr[cpu].address,
+               (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
+
+       memset(&ctxt, 0, sizeof(ctxt));
+
+       ctxt.flags = VGCF_IN_KERNEL;
+       ctxt.user_regs.ds = __USER_DS;
+       ctxt.user_regs.es = __USER_DS;
+       ctxt.user_regs.fs = 0;
+       ctxt.user_regs.gs = 0;
+       ctxt.user_regs.ss = __KERNEL_DS|0x3;
+       ctxt.user_regs.cs = __KERNEL_CS|0x3;
+       ctxt.user_regs.rip = start_rip;
+       ctxt.user_regs.rsp = idle->thread.rsp;
+       ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
+
+       /* FPU is set up to default initial state. */
+       memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+
+       /* Virtual IDT is empty at start-of-day. */
+       for ( i = 0; i < 256; i++ )
+       {
+               ctxt.trap_ctxt[i].vector = i;
+               ctxt.trap_ctxt[i].cs     = FLAT_KERNEL_CS;
+       }
+       smp_trap_init(ctxt.trap_ctxt);
+
+       /* No LDT. */
+       ctxt.ldt_ents = 0;
+
+       {
+               unsigned long va;
+               int f;
+
+               for (va = cpu_gdt_descr[cpu].address, f = 0;
+                    va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
+                    va += PAGE_SIZE, f++) {
+                       ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+                       make_page_readonly((void *)va);
+               }
+               ctxt.gdt_ents = GDT_ENTRIES;
+       }
+
+       /* Ring 1 stack is the initial stack. */
+       ctxt.kernel_ss = __KERNEL_DS;
+       ctxt.kernel_sp = idle->thread.rsp;
+
+       /* Callback handlers. */
+       ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
+       ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+       ctxt.syscall_callback_eip  = (unsigned long)system_call;
+
+       ctxt.ctrlreg[3] = (unsigned long)virt_to_machine(init_level4_pgt);
+
+       boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
+
+       if (!boot_error) {
+               /*
+                * allow APs to start initializing.
+                */
+               Dprintk("Before Callout %d.\n", cpu);
+               cpu_set(cpu, cpu_callout_map);
+               Dprintk("After Callout %d.\n", cpu);
+
+               /*
+                * Wait 5s total for a response
+                */
+               for (timeout = 0; timeout < 50000; timeout++) {
+                       if (cpu_isset(cpu, cpu_callin_map))
+                               break;  /* It has booted */
+                       udelay(100);
+               }
+
+               if (cpu_isset(cpu, cpu_callin_map)) {
+                       /* number CPUs logically, starting from 1 (BSP is 0) */
+                       Dprintk("OK.\n");
+                       printk("CPU%d: ", cpu);
+                       print_cpu_info(&cpu_data[cpu]);
+                       Dprintk("CPU has booted.\n");
+               } else {
+                       boot_error= 1;
+               }
+       }
+       x86_cpu_to_apicid[cpu] = apicid;
+#else
        Dprintk("Setting warm reset code and vector.\n");
 
        CMOS_WRITE(0xa, 0xf);
@@ -729,6 +870,7 @@ static int __cpuinit do_boot_cpu(int cpu
 #endif
                }
        }
+#endif
        if (boot_error) {
                cpu_clear(cpu, cpu_callout_map); /* was set here 
(do_boot_cpu()) */
                clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
@@ -857,11 +999,13 @@ static __cpuinit void enforce_max_cpus(u
  */
 static int __cpuinit smp_sanity_check(unsigned max_cpus)
 {
+#ifndef CONFIG_XEN
        if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
                printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
                       hard_smp_processor_id());
                physid_set(hard_smp_processor_id(), phys_cpu_present_map);
        }
+#endif
 
        /*
         * If we couldn't find an SMP configuration at boot time,
@@ -876,6 +1020,7 @@ static int __cpuinit smp_sanity_check(un
                return -1;
        }
 
+#ifndef CONFIG_XEN
        /*
         * Should not be necessary because the MP table should list the boot
         * CPU too, but we do it for the sake of robustness anyway.
@@ -896,16 +1041,17 @@ static int __cpuinit smp_sanity_check(un
                nr_ioapics = 0;
                return -1;
        }
+#endif
 
        /*
         * If SMP should be disabled, then really disable it!
         */
        if (!max_cpus) {
+               HYPERVISOR_shared_info->n_vcpu = 1;
                printk(KERN_INFO "SMP mode deactivated, forcing use of dummy 
APIC emulation.\n");
                nr_ioapics = 0;
                return -1;
        }
-
        return 0;
 }
 
@@ -928,7 +1074,7 @@ void __cpuinit smp_prepare_cpus(unsigned
         */
        for (i = 0; i < NR_CPUS; i++) {
                int apicid = cpu_present_to_apicid(i);
-               if (physid_isset(apicid, phys_cpu_present_map)) {
+               if (i < HYPERVISOR_shared_info->n_vcpu) {
                        cpu_set(i, cpu_present_map);
                        /* possible map would be different if we supported real
                           CPU hotplug. */
@@ -942,7 +1088,9 @@ void __cpuinit smp_prepare_cpus(unsigned
                return;
        }
 
+       smp_intr_init();
 
+#ifndef CONFIG_XEN
        /*
         * Switch from PIC to APIC mode.
         */
@@ -954,6 +1102,7 @@ void __cpuinit smp_prepare_cpus(unsigned
                      GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
                /* Or can we switch back to PIC here? */
        }
+#endif
 
        /*
         * Now start the IO-APICs
@@ -967,7 +1116,9 @@ void __cpuinit smp_prepare_cpus(unsigned
         * Set up local APIC timer on boot CPU.
         */
 
+#ifndef CONFIG_XEN
        setup_boot_APIC_clock();
+#endif
 }
 
 /*
@@ -1021,6 +1172,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
  */
 void __cpuinit smp_cpus_done(unsigned int max_cpus)
 {
+#ifndef CONFIG_XEN
        zap_low_mappings();
        smp_cleanup_boot();
 
@@ -1028,8 +1180,60 @@ void __cpuinit smp_cpus_done(unsigned in
        setup_ioapic_dest();
 #endif
 
-       detect_siblings();
        time_init_gtod();
 
        check_nmi_watchdog();
+#endif
+       detect_siblings();
+}
+
+extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
+extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
+
+static DEFINE_PER_CPU(int, resched_irq);
+static DEFINE_PER_CPU(int, callfunc_irq);
+static char resched_name[NR_CPUS][15];
+static char callfunc_name[NR_CPUS][15];
+
+void smp_intr_init(void)
+{
+       int cpu = smp_processor_id();
+
+       per_cpu(resched_irq, cpu) =
+               bind_ipi_to_irq(RESCHEDULE_VECTOR);
+       sprintf(resched_name[cpu], "resched%d", cpu);
+       BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
+                          SA_INTERRUPT, resched_name[cpu], NULL));
+
+       per_cpu(callfunc_irq, cpu) =
+               bind_ipi_to_irq(CALL_FUNCTION_VECTOR);
+       sprintf(callfunc_name[cpu], "callfunc%d", cpu);
+       BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
+                          smp_call_function_interrupt,
+                          SA_INTERRUPT, callfunc_name[cpu], NULL));
+}
+
+static void smp_intr_exit(void)
+{
+       int cpu = smp_processor_id();
+
+       free_irq(per_cpu(resched_irq, cpu), NULL);
+       unbind_ipi_from_irq(RESCHEDULE_VECTOR);
+
+       free_irq(per_cpu(callfunc_irq, cpu), NULL);
+       unbind_ipi_from_irq(CALL_FUNCTION_VECTOR);
+}
+
+void smp_suspend(void)
+{
+       /* XXX todo: take down time and ipi's on all cpus */
+       local_teardown_timer_irq();
+       smp_intr_exit();
+}
+
+void smp_resume(void)
+{
+       /* XXX todo: restore time and ipi's on all cpus */
+       smp_intr_init();
+       local_setup_timer_irq();
 }
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c
@@ -105,7 +105,6 @@ static int __init mpf_checksum(unsigned 
        return sum & 0xFF;
 }
 
-#ifndef CONFIG_XEN
 static void __init MP_processor_info (struct mpc_config_processor *m)
 {
        int ver;
@@ -162,12 +161,6 @@ static void __init MP_processor_info (st
                x86_cpu_to_apicid[num_processors - found_bsp] = m->mpc_apicid;
        }
 }
-#else
-void __init MP_processor_info (struct mpc_config_processor *m)
-{
-       num_processors++;
-}
-#endif /* CONFIG_XEN */
 
 static void __init MP_bus_info (struct mpc_config_bus *m)
 {
@@ -702,7 +695,6 @@ void __init mp_register_lapic (
        if (id == boot_cpu_physical_apicid)
                boot_cpu = 1;
 
-#ifndef CONFIG_XEN
        processor.mpc_type = MP_PROCESSOR;
        processor.mpc_apicid = id;
        processor.mpc_apicver = 0x10; /* TBD: lapic version */
@@ -713,7 +705,6 @@ void __init mp_register_lapic (
        processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
        processor.mpc_reserved[0] = 0;
        processor.mpc_reserved[1] = 0;
-#endif
 
        MP_processor_info(&processor);
 }
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic.c
===================================================================
--- /dev/null
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Generic APIC sub-arch probe layer.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ */
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <asm/smp.h>
+#include <asm/ipi.h>
+
+#if defined(CONFIG_ACPI_BUS)
+#include <acpi/acpi_bus.h>
+#endif
+
+/* which logical CPU number maps to which CPU (physical APIC ID) */
+u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+EXPORT_SYMBOL(x86_cpu_to_apicid);
+u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+extern struct genapic apic_cluster;
+extern struct genapic apic_flat;
+extern struct genapic apic_xen;
+
+struct genapic *genapic = &apic_xen;
+
+/*
+ * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
+ */
+void __init clustered_apic_check(void)
+{
+       long i;
+       u8 clusters, max_cluster;
+       u8 id;
+       u8 cluster_cnt[NUM_APIC_CLUSTERS];
+
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+               /* AMD always uses flat mode right now */
+               genapic = &apic_flat;
+               goto print;
+       }
+
+#if defined(CONFIG_ACPI_BUS)
+       /*
+        * Some x86_64 machines use physical APIC mode regardless of how many
+        * procs/clusters are present (x86_64 ES7000 is an example).
+        */
+       if (acpi_fadt.revision > FADT2_REVISION_ID)
+               if (acpi_fadt.force_apic_physical_destination_mode) {
+                       genapic = &apic_cluster;
+                       goto print;
+               }
+#endif
+
+       memset(cluster_cnt, 0, sizeof(cluster_cnt));
+
+       for (i = 0; i < NR_CPUS; i++) {
+               id = bios_cpu_apicid[i];
+               if (id != BAD_APICID)
+                       cluster_cnt[APIC_CLUSTERID(id)]++;
+       }
+
+       clusters = 0;
+       max_cluster = 0;
+       for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
+               if (cluster_cnt[i] > 0) {
+                       ++clusters;
+                       if (cluster_cnt[i] > max_cluster)
+                               max_cluster = cluster_cnt[i];
+               }
+       }
+
+       /*
+        * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode,
+        * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical
+        * else physical mode.
+        * (We don't use lowest priority delivery + HW APIC IRQ steering, so
+        * can ignore the clustered logical case and go straight to physical.)
+        */
+       if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster)
+               genapic = &apic_flat;
+       else
+               genapic = &apic_cluster;
+
+print:
+       /* hardcode to xen apic functions */
+       genapic = &apic_xen;
+       printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
+}
+
+/* Same for both flat and clustered. */
+
+extern void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned 
int dest);
+
+void send_IPI_self(int vector)
+{
+       xen_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+}
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic_xen.c
===================================================================
--- /dev/null
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic_xen.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Xen APIC subarch code.  Maximum 8 CPUs, logical delivery.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ *
+ * Hacked to pieces for Xen by Chris Wright.
+ */
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#include <asm-xen/evtchn.h>
+
+DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
+
+static inline void __send_IPI_one(unsigned int cpu, int vector)
+{
+       unsigned int evtchn;
+       Dprintk("%s\n", __FUNCTION__);
+
+       evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
+       if (evtchn)
+               notify_via_evtchn(evtchn);
+       else
+               printk("send_IPI to unbound port %d/%d", cpu, vector);
+
+}
+
+void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int 
dest)
+{
+       int cpu;
+
+       switch (shortcut) {
+       case APIC_DEST_SELF:
+               __send_IPI_one(smp_processor_id(), vector);
+               break;
+       case APIC_DEST_ALLBUT:
+               for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+                       if (cpu == smp_processor_id())
+                               continue;
+                       if (cpu_isset(cpu, cpu_online_map)) {
+                               __send_IPI_one(cpu, vector);
+                       }
+               }
+               break;
+       case APIC_DEST_ALLINC:
+               for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+                       if (cpu_isset(cpu, cpu_online_map)) {
+                               __send_IPI_one(cpu, vector);
+                       }
+               }
+               break;
+       default:
+               printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
+                      vector);
+               break;
+       }
+
+}
+
+static cpumask_t xen_target_cpus(void)
+{
+       return cpu_online_map;
+}
+
+/*
+ * Set up the logical destination ID.
+ * Do nothing, not called now.
+ */
+static void xen_init_apic_ldr(void)
+{
+       Dprintk("%s\n", __FUNCTION__);
+       return;
+}
+
+static void xen_send_IPI_allbutself(int vector)
+{
+       /*
+        * if there are no other CPUs in the system then
+        * we get an APIC send error if we try to broadcast.
+        * thus we have to avoid sending IPIs in this case.
+        */
+       Dprintk("%s\n", __FUNCTION__);
+       if (num_online_cpus() > 1)
+               xen_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, 
APIC_DEST_LOGICAL);
+}
+
+static void xen_send_IPI_all(int vector)
+{
+       Dprintk("%s\n", __FUNCTION__);
+       xen_send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
+}
+
+static void xen_send_IPI_mask(cpumask_t cpumask, int vector)
+{
+       unsigned long mask = cpus_addr(cpumask)[0];
+       unsigned int cpu;
+       unsigned long flags;
+
+       Dprintk("%s\n", __FUNCTION__);
+       local_irq_save(flags);
+       WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
+
+       for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+               if (cpu_isset(cpu, cpumask)) {
+                       __send_IPI_one(cpu, vector);
+               }
+       }
+       local_irq_restore(flags);
+}
+
+static int xen_apic_id_registered(void)
+{
+       /* better be set */
+       Dprintk("%s\n", __FUNCTION__);
+       return physid_isset(smp_processor_id(), phys_cpu_present_map);
+}
+
+static unsigned int xen_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+       Dprintk("%s\n", __FUNCTION__);
+       return cpus_addr(cpumask)[0] & APIC_ALL_CPUS;
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+       u32 ebx;
+
+       Dprintk("%s\n", __FUNCTION__);
+       ebx = cpuid_ebx(1);
+       return ((ebx >> 24) & 0xFF) >> index_msb;
+}
+
+struct genapic apic_xen =  {
+       .name = "xen",
+       .int_delivery_mode = dest_LowestPrio,
+       .int_dest_mode = (APIC_DEST_LOGICAL != 0),
+       .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
+       .target_cpus = xen_target_cpus,
+       .apic_id_registered = xen_apic_id_registered,
+       .init_apic_ldr = xen_init_apic_ldr,
+       .send_IPI_all = xen_send_IPI_all,
+       .send_IPI_allbutself = xen_send_IPI_allbutself,
+       .send_IPI_mask = xen_send_IPI_mask,
+       .cpu_mask_to_apicid = xen_cpu_mask_to_apicid,
+       .phys_pkg_id = phys_pkg_id,
+};

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel