* Puthiyaparambil, Aravindh (aravindh.puthiyaparambil@xxxxxxxxxx) wrote:
> Is there any way for us to see an early copy of this patch? I think we
> need to come up with way for apic_xen to accommodate clustered apics too
> or there might be issues running on large systems.
Here's an update from the last copy I made (about one week old I think).
Many thanks to Xin who found my last bug so that 64-bit syscalls actually
worked! I'd run this one quite successfully on dom0 (before refreshing to
a newer Xen snapshot, this actual patch is not more than compile tested).
I believe Xin is continuing on to work on domU (which conincidentally
has apic compilation issues), and may have a more up-to-date patch.
thanks,
-chris
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S
@@ -43,7 +43,6 @@
startup_64:
ENTRY(_start)
cld
- movq init_rsp(%rip),%rsp
/* Copy the necessary stuff from xen_start_info structure. */
movq $xen_start_info_union,%rdi
movq $64,%rcx /* sizeof (union xen_start_info_union) / sizeof
(long) */
@@ -54,6 +53,7 @@ ENTRY(_start)
cld
#endif /* CONFIG_SMP */
+ movq init_rsp(%rip),%rsp
/* zero EFLAGS after setting rsp */
pushq $0
popfq
@@ -140,6 +140,7 @@ ENTRY(cpu_gdt_table)
.quad 0,0 /* TSS */
.quad 0,0 /* LDT */
.quad 0,0,0 /* three TLS descriptors */
+ .quad 0 /* unused now? __KERNEL16_CS - 16bit
PM for S3 wakeup. */
gdt_end:
/* asm/segment.h:GDT_ENTRIES must match this */
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile
@@ -25,10 +25,10 @@ obj-$(CONFIG_ACPI_BOOT) += acpi/
c-obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_X86_CPUID) += cpuid.o
-#obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o
+obj-$(CONFIG_SMP) += smp.o smpboot.o
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o
c-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
-obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o
+obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o genapic_xen.o
c-obj-$(CONFIG_X86_IO_APIC) += genapic.o genapic_cluster.o genapic_flat.o
#obj-$(CONFIG_PM) += suspend.o
#obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o
@@ -54,7 +54,7 @@ intel_cacheinfo-y += ../../../i386/kern
quirks-y += ../../i386/kernel/quirks.o
c-link := init_task.o
-s-link := vsyscall.o
+s-link := vsyscall.o
$(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-obj-m) $(c-link)) $(patsubst
%.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
@ln -fsn $(srctree)/arch/x86_64/kernel/$(notdir $@) $@
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c
@@ -259,13 +259,13 @@ void __init cpu_init (void)
* Initialize the per-CPU GDT with the boot GDT,
* and set up the GDT descriptor:
*/
+#ifndef CONFIG_XEN
if (cpu) {
memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE);
}
cpu_gdt_descr[cpu].size = GDT_SIZE;
cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu];
-#if 0
asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu]));
asm volatile("lidt %0" :: "m" (idt_descr));
#endif
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c
@@ -599,6 +599,17 @@ static void __init print_memory_map(char
}
}
+void __init smp_alloc_memory(void)
+{
+ int cpu;
+
+ for (cpu = 1; cpu < NR_CPUS; cpu++) {
+ cpu_gdt_descr[cpu].address = (unsigned long)
+ alloc_bootmem_low_pages(PAGE_SIZE);
+ /* XXX free unused pages later */
+ }
+}
+
void __init setup_arch(char **cmdline_p)
{
int i, j;
@@ -738,6 +749,9 @@ void __init setup_arch(char **cmdline_p)
}
}
#endif
+#ifdef CONFIG_SMP
+ smp_alloc_memory();
+#endif
paging_init();
#ifdef CONFIG_X86_LOCAL_APIC
/*
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/xen_entry.S
@@ -8,11 +8,14 @@
#define sizeof_vcpu_shift 3
#ifdef CONFIG_SMP
-#define preempt_disable(reg) incl threadinfo_preempt_count(reg)
-#define preempt_enable(reg) decl threadinfo_preempt_count(reg)
+//#define preempt_disable(reg) incl threadinfo_preempt_count(reg)
+//#define preempt_enable(reg) decl threadinfo_preempt_count(reg)
+#define preempt_disable(reg)
+#define preempt_enable(reg)
#define XEN_GET_VCPU_INFO(reg) preempt_disable(%rbp) ; \
movq %gs:pda_cpunumber,reg ; \
- shl $sizeof_vcpu_shift,reg ; \
+ shl $32, reg ; \
+ shr $32-sizeof_vcpu_shift,reg ; \
addq HYPERVISOR_shared_info,reg
#define XEN_PUT_VCPU_INFO(reg) preempt_enable(%rbp) ; \
#define XEN_PUT_VCPU_INFO_fixup .byte 0xff,0xff,0xff
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/irq.c
@@ -20,7 +20,11 @@
*/
atomic_t irq_err_count;
-
+#ifdef CONFIG_X86_IO_APIC
+#ifdef APIC_MISMATCH_DEBUG
+atomic_t irq_mis_count;
+#endif
+#endif
/*
* Generic, controller-independent functions:
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c
@@ -28,6 +28,9 @@
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/apicdef.h>
+#include <asm-xen/evtchn.h>
+
+#define xxprint(msg) HYPERVISOR_console_io(CONSOLEIO_write, strlen(msg), msg)
/*
* Smarter SMP flushing macros.
@@ -103,6 +106,7 @@ static inline void leave_mm (unsigned lo
* 2) Leave the mm if we are in the lazy tlb mode.
*/
+#if 0 /* Xen */
asmlinkage void smp_invalidate_interrupt (void)
{
unsigned long cpu;
@@ -251,6 +255,20 @@ void flush_tlb_all(void)
on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
}
+#else
+asmlinkage void smp_invalidate_interrupt (void)
+{ return; }
+void flush_tlb_current_task(void)
+{ xen_tlb_flush_mask(¤t->mm->cpu_vm_mask); }
+void flush_tlb_mm (struct mm_struct * mm)
+{ xen_tlb_flush_mask(&mm->cpu_vm_mask); }
+void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+{ xen_invlpg_mask(&vma->vm_mm->cpu_vm_mask, va); }
+void flush_tlb_all(void)
+{ xen_tlb_flush_all(); }
+#endif /* Xen */
+
+
void smp_kdb_stop(void)
{
send_IPI_allbutself(KDB_VECTOR);
@@ -310,13 +328,13 @@ static void __smp_call_function (void (*
/* Wait for response */
while (atomic_read(&data.started) != cpus)
- cpu_relax();
+ barrier();
if (!wait)
return;
while (atomic_read(&data.finished) != cpus)
- cpu_relax();
+ barrier();
}
/*
@@ -350,7 +368,11 @@ void smp_stop_cpu(void)
*/
cpu_clear(smp_processor_id(), cpu_online_map);
local_irq_disable();
+#ifdef CONFIG_XEN
+ xxprint("stop_this_cpu disable_local_APIC\n");
+#else
disable_local_APIC();
+#endif
local_irq_enable();
}
@@ -364,8 +386,10 @@ static void smp_really_stop_cpu(void *du
void smp_send_stop(void)
{
int nolock = 0;
+#ifndef CONFIG_XEN
if (reboot_force)
return;
+#endif
/* Don't deadlock on the call lock in panic */
if (!spin_trylock(&call_lock)) {
/* ignore locking because we have paniced anyways */
@@ -376,7 +400,11 @@ void smp_send_stop(void)
spin_unlock(&call_lock);
local_irq_disable();
+#ifdef CONFIG_XEN
+ xxprint("stop_this_cpu disable_local_APIC\n");
+#else
disable_local_APIC();
+#endif
local_irq_enable();
}
@@ -385,18 +413,17 @@ void smp_send_stop(void)
* all the work is done automatically when
* we return from the interrupt.
*/
-asmlinkage void smp_reschedule_interrupt(void)
+asmlinkage irqreturn_t smp_reschedule_interrupt(void)
{
- ack_APIC_irq();
+ return IRQ_HANDLED;
}
-asmlinkage void smp_call_function_interrupt(void)
+asmlinkage irqreturn_t smp_call_function_interrupt(void)
{
void (*func) (void *info) = call_data->func;
void *info = call_data->info;
int wait = call_data->wait;
- ack_APIC_irq();
/*
* Notify initiating CPU that I've grabbed the data and am
* about to execute the function
@@ -413,6 +440,8 @@ asmlinkage void smp_call_function_interr
mb();
atomic_inc(&call_data->finished);
}
+
+ return IRQ_HANDLED;
}
int safe_smp_processor_id(void)
@@ -422,7 +451,10 @@ int safe_smp_processor_id(void)
if (disable_apic)
return 0;
- apicid = hard_smp_processor_id();
+#ifdef CONFIG_XEN
+ return smp_processor_id();
+#endif
+
if (x86_cpu_to_apicid[apicid] == apicid)
return apicid;
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c
@@ -953,6 +953,17 @@ void __init trap_init(void)
cpu_init();
}
+void smp_trap_init(trap_info_t *trap_ctxt)
+{
+ trap_info_t *t = trap_table;
+
+ for (t = trap_table; t->address; t++) {
+ trap_ctxt[t->vector].flags = t->flags;
+ trap_ctxt[t->vector].cs = t->cs;
+ trap_ctxt[t->vector].address = t->address;
+ }
+}
+
/* Actual parsing is done early in setup.c. */
static int __init oops_dummy(char *s)
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c
@@ -47,6 +47,7 @@
#include <linux/bootmem.h>
#include <linux/thread_info.h>
#include <linux/module.h>
+#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/mc146818rtc.h>
@@ -57,6 +58,8 @@
#include <asm/tlbflush.h>
#include <asm/proto.h>
#include <asm/nmi.h>
+#include <asm/mpspec.h>
+#include <asm/arch_hooks.h>
/* Change for real CPU hotplug. Note other files need to be fixed
first too. */
@@ -96,6 +99,7 @@ cpumask_t cpu_sibling_map[NR_CPUS] __cac
cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
EXPORT_SYMBOL(cpu_core_map);
+#ifndef CONFIG_XEN
/*
* Trampoline 80x86 program as an array.
*/
@@ -115,6 +119,7 @@ static unsigned long __cpuinit setup_tra
memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
return virt_to_phys(tramp);
}
+#endif
/*
* The bootstrap kernel entry code has set these up. Save them for
@@ -130,6 +135,7 @@ static void __cpuinit smp_store_cpu_info
print_cpu_info(c);
}
+#ifndef CONFIG_XEN
/*
* New Funky TSC sync algorithm borrowed from IA64.
* Main advantage is that it doesn't reset the TSCs fully and
@@ -331,6 +337,7 @@ static __init int notscsync_setup(char *
return 0;
}
__setup("notscsync", notscsync_setup);
+#endif
static atomic_t init_deasserted __cpuinitdata;
@@ -343,6 +350,7 @@ void __cpuinit smp_callin(void)
int cpuid, phys_id;
unsigned long timeout;
+#ifndef CONFIG_XEN
/*
* If waken up by an INIT in an 82489DX configuration
* we may get here before an INIT-deassert IPI reaches
@@ -352,10 +360,11 @@ void __cpuinit smp_callin(void)
while (!atomic_read(&init_deasserted))
cpu_relax();
+#endif
/*
* (This works even if the APIC is not enabled.)
*/
- phys_id = GET_APIC_ID(apic_read(APIC_ID));
+ phys_id = smp_processor_id();
cpuid = smp_processor_id();
if (cpu_isset(cpuid, cpu_callin_map)) {
panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
@@ -389,6 +398,7 @@ void __cpuinit smp_callin(void)
cpuid);
}
+#ifndef CONFIG_XEN
/*
* the boot CPU has finished the init stage and is spinning
* on callin_map until we finish. We are free to set up this
@@ -398,6 +408,7 @@ void __cpuinit smp_callin(void)
Dprintk("CALLIN, before setup_local_APIC().\n");
setup_local_APIC();
+#endif
/*
* Get our bogomips.
@@ -405,7 +416,9 @@ void __cpuinit smp_callin(void)
calibrate_delay();
Dprintk("Stack at about %p\n",&cpuid);
+#ifndef CONFIG_XEN
disable_APIC_timer();
+#endif
/*
* Save our processor parameters
@@ -418,6 +431,26 @@ void __cpuinit smp_callin(void)
cpu_set(cpuid, cpu_callin_map);
}
+static irqreturn_t ldebug_interrupt(int irq, void *dev_id, struct pt_regs
*regs)
+{
+ return IRQ_HANDLED;
+}
+
+static DEFINE_PER_CPU(int, ldebug_irq);
+static char ldebug_name[NR_CPUS][15];
+
+void ldebug_setup(void)
+{
+ int cpu = smp_processor_id();
+
+ per_cpu(ldebug_irq, cpu) = bind_virq_to_irq(VIRQ_DEBUG);
+ sprintf(ldebug_name[cpu], "ldebug%d", cpu);
+ BUG_ON(request_irq(per_cpu(ldebug_irq, cpu), ldebug_interrupt,
+ SA_INTERRUPT, ldebug_name[cpu], NULL));
+}
+
+extern void local_setup_timer(void);
+
/*
* Setup code on secondary processor (after comming out of the trampoline)
*/
@@ -434,6 +467,7 @@ void __cpuinit start_secondary(void)
/* otherwise gcc will move up the smp_processor_id before the cpu_init
*/
barrier();
+#ifndef CONFIG_XEN
Dprintk("cpu %d: setting up apic clock\n", smp_processor_id());
setup_secondary_APIC_clock();
@@ -446,6 +480,12 @@ void __cpuinit start_secondary(void)
}
enable_APIC_timer();
+#else
+ local_setup_timer();
+ ldebug_setup();
+ smp_intr_init();
+ local_irq_enable();
+#endif
/*
* Allow the master to continue.
@@ -453,10 +493,12 @@ void __cpuinit start_secondary(void)
cpu_set(smp_processor_id(), cpu_online_map);
mb();
+#ifndef CONFIG_XEN
/* Wait for TSC sync to not schedule things before.
We still process interrupts, which could see an inconsistent
time in that window unfortunately. */
tsc_sync_wait();
+#endif
cpu_idle();
}
@@ -502,6 +544,7 @@ static void inquire_remote_apic(int apic
}
#endif
+#ifndef CONFIG_XEN
/*
* Kick the secondary to wake up.
*/
@@ -627,6 +670,7 @@ static int __cpuinit wakeup_secondary_vi
return (send_status | accept_status);
}
+#endif
/*
* Boot one CPU.
@@ -637,6 +681,14 @@ static int __cpuinit do_boot_cpu(int cpu
unsigned long boot_error;
int timeout;
unsigned long start_rip;
+#ifdef CONFIG_XEN
+ vcpu_guest_context_t ctxt;
+ extern void startup_64_smp(void);
+ extern void hypervisor_callback(void);
+ extern void failsafe_callback(void);
+ extern void smp_trap_init(trap_info_t *);
+ int i;
+#endif
/*
* We can't use kernel_thread since we must avoid to
* reschedule the child.
@@ -649,7 +701,7 @@ static int __cpuinit do_boot_cpu(int cpu
cpu_pda[cpu].pcurrent = idle;
- start_rip = setup_trampoline();
+ start_rip = (unsigned long)startup_64_smp;
init_rsp = idle->thread.rsp;
per_cpu(init_tss,cpu).rsp0 = init_rsp;
@@ -666,6 +718,95 @@ static int __cpuinit do_boot_cpu(int cpu
atomic_set(&init_deasserted, 0);
+#ifdef CONFIG_XEN
+ if (cpu_gdt_descr[0].size > PAGE_SIZE)
+ BUG();
+ cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
+ memcpy((void *)cpu_gdt_descr[cpu].address,
+ (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
+
+ memset(&ctxt, 0, sizeof(ctxt));
+
+ ctxt.flags = VGCF_IN_KERNEL;
+ ctxt.user_regs.ds = __USER_DS;
+ ctxt.user_regs.es = __USER_DS;
+ ctxt.user_regs.fs = 0;
+ ctxt.user_regs.gs = 0;
+ ctxt.user_regs.ss = __KERNEL_DS|0x3;
+ ctxt.user_regs.cs = __KERNEL_CS|0x3;
+ ctxt.user_regs.rip = start_rip;
+ ctxt.user_regs.rsp = idle->thread.rsp;
+ ctxt.user_regs.eflags = (1<<9) | (1<<2) | (idle->thread.io_pl<<12);
+
+ /* FPU is set up to default initial state. */
+ memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+
+ /* Virtual IDT is empty at start-of-day. */
+ for ( i = 0; i < 256; i++ )
+ {
+ ctxt.trap_ctxt[i].vector = i;
+ ctxt.trap_ctxt[i].cs = FLAT_KERNEL_CS;
+ }
+ smp_trap_init(ctxt.trap_ctxt);
+
+ /* No LDT. */
+ ctxt.ldt_ents = 0;
+
+ {
+ unsigned long va;
+ int f;
+
+ for (va = cpu_gdt_descr[cpu].address, f = 0;
+ va < cpu_gdt_descr[cpu].address + cpu_gdt_descr[cpu].size;
+ va += PAGE_SIZE, f++) {
+ ctxt.gdt_frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
+ make_page_readonly((void *)va);
+ }
+ ctxt.gdt_ents = GDT_ENTRIES;
+ }
+
+ /* Ring 1 stack is the initial stack. */
+ ctxt.kernel_ss = __KERNEL_DS;
+ ctxt.kernel_sp = idle->thread.rsp;
+
+ /* Callback handlers. */
+ ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
+ ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+ ctxt.syscall_callback_eip = (unsigned long)system_call;
+
+ ctxt.ctrlreg[3] = (unsigned long)virt_to_machine(init_level4_pgt);
+
+ boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
+
+ if (!boot_error) {
+ /*
+ * allow APs to start initializing.
+ */
+ Dprintk("Before Callout %d.\n", cpu);
+ cpu_set(cpu, cpu_callout_map);
+ Dprintk("After Callout %d.\n", cpu);
+
+ /*
+ * Wait 5s total for a response
+ */
+ for (timeout = 0; timeout < 50000; timeout++) {
+ if (cpu_isset(cpu, cpu_callin_map))
+ break; /* It has booted */
+ udelay(100);
+ }
+
+ if (cpu_isset(cpu, cpu_callin_map)) {
+ /* number CPUs logically, starting from 1 (BSP is 0) */
+ Dprintk("OK.\n");
+ printk("CPU%d: ", cpu);
+ print_cpu_info(&cpu_data[cpu]);
+ Dprintk("CPU has booted.\n");
+ } else {
+ boot_error= 1;
+ }
+ }
+ x86_cpu_to_apicid[cpu] = apicid;
+#else
Dprintk("Setting warm reset code and vector.\n");
CMOS_WRITE(0xa, 0xf);
@@ -729,6 +870,7 @@ static int __cpuinit do_boot_cpu(int cpu
#endif
}
}
+#endif
if (boot_error) {
cpu_clear(cpu, cpu_callout_map); /* was set here
(do_boot_cpu()) */
clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
@@ -857,11 +999,13 @@ static __cpuinit void enforce_max_cpus(u
*/
static int __cpuinit smp_sanity_check(unsigned max_cpus)
{
+#ifndef CONFIG_XEN
if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
hard_smp_processor_id());
physid_set(hard_smp_processor_id(), phys_cpu_present_map);
}
+#endif
/*
* If we couldn't find an SMP configuration at boot time,
@@ -876,6 +1020,7 @@ static int __cpuinit smp_sanity_check(un
return -1;
}
+#ifndef CONFIG_XEN
/*
* Should not be necessary because the MP table should list the boot
* CPU too, but we do it for the sake of robustness anyway.
@@ -896,16 +1041,17 @@ static int __cpuinit smp_sanity_check(un
nr_ioapics = 0;
return -1;
}
+#endif
/*
* If SMP should be disabled, then really disable it!
*/
if (!max_cpus) {
+ HYPERVISOR_shared_info->n_vcpu = 1;
printk(KERN_INFO "SMP mode deactivated, forcing use of dummy
APIC emulation.\n");
nr_ioapics = 0;
return -1;
}
-
return 0;
}
@@ -928,7 +1074,7 @@ void __cpuinit smp_prepare_cpus(unsigned
*/
for (i = 0; i < NR_CPUS; i++) {
int apicid = cpu_present_to_apicid(i);
- if (physid_isset(apicid, phys_cpu_present_map)) {
+ if (i < HYPERVISOR_shared_info->n_vcpu) {
cpu_set(i, cpu_present_map);
/* possible map would be different if we supported real
CPU hotplug. */
@@ -942,7 +1088,9 @@ void __cpuinit smp_prepare_cpus(unsigned
return;
}
+ smp_intr_init();
+#ifndef CONFIG_XEN
/*
* Switch from PIC to APIC mode.
*/
@@ -954,6 +1102,7 @@ void __cpuinit smp_prepare_cpus(unsigned
GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
/* Or can we switch back to PIC here? */
}
+#endif
/*
* Now start the IO-APICs
@@ -967,7 +1116,9 @@ void __cpuinit smp_prepare_cpus(unsigned
* Set up local APIC timer on boot CPU.
*/
+#ifndef CONFIG_XEN
setup_boot_APIC_clock();
+#endif
}
/*
@@ -1021,6 +1172,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
*/
void __cpuinit smp_cpus_done(unsigned int max_cpus)
{
+#ifndef CONFIG_XEN
zap_low_mappings();
smp_cleanup_boot();
@@ -1028,8 +1180,60 @@ void __cpuinit smp_cpus_done(unsigned in
setup_ioapic_dest();
#endif
- detect_siblings();
time_init_gtod();
check_nmi_watchdog();
+#endif
+ detect_siblings();
+}
+
+extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
+extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
+
+static DEFINE_PER_CPU(int, resched_irq);
+static DEFINE_PER_CPU(int, callfunc_irq);
+static char resched_name[NR_CPUS][15];
+static char callfunc_name[NR_CPUS][15];
+
+void smp_intr_init(void)
+{
+ int cpu = smp_processor_id();
+
+ per_cpu(resched_irq, cpu) =
+ bind_ipi_to_irq(RESCHEDULE_VECTOR);
+ sprintf(resched_name[cpu], "resched%d", cpu);
+ BUG_ON(request_irq(per_cpu(resched_irq, cpu), smp_reschedule_interrupt,
+ SA_INTERRUPT, resched_name[cpu], NULL));
+
+ per_cpu(callfunc_irq, cpu) =
+ bind_ipi_to_irq(CALL_FUNCTION_VECTOR);
+ sprintf(callfunc_name[cpu], "callfunc%d", cpu);
+ BUG_ON(request_irq(per_cpu(callfunc_irq, cpu),
+ smp_call_function_interrupt,
+ SA_INTERRUPT, callfunc_name[cpu], NULL));
+}
+
+static void smp_intr_exit(void)
+{
+ int cpu = smp_processor_id();
+
+ free_irq(per_cpu(resched_irq, cpu), NULL);
+ unbind_ipi_from_irq(RESCHEDULE_VECTOR);
+
+ free_irq(per_cpu(callfunc_irq, cpu), NULL);
+ unbind_ipi_from_irq(CALL_FUNCTION_VECTOR);
+}
+
+void smp_suspend(void)
+{
+ /* XXX todo: take down time and ipi's on all cpus */
+ local_teardown_timer_irq();
+ smp_intr_exit();
+}
+
+void smp_resume(void)
+{
+ /* XXX todo: restore time and ipi's on all cpus */
+ smp_intr_init();
+ local_setup_timer_irq();
}
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c
===================================================================
--- xen-unstable.orig/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c
@@ -105,7 +105,6 @@ static int __init mpf_checksum(unsigned
return sum & 0xFF;
}
-#ifndef CONFIG_XEN
static void __init MP_processor_info (struct mpc_config_processor *m)
{
int ver;
@@ -162,12 +161,6 @@ static void __init MP_processor_info (st
x86_cpu_to_apicid[num_processors - found_bsp] = m->mpc_apicid;
}
}
-#else
-void __init MP_processor_info (struct mpc_config_processor *m)
-{
- num_processors++;
-}
-#endif /* CONFIG_XEN */
static void __init MP_bus_info (struct mpc_config_bus *m)
{
@@ -702,7 +695,6 @@ void __init mp_register_lapic (
if (id == boot_cpu_physical_apicid)
boot_cpu = 1;
-#ifndef CONFIG_XEN
processor.mpc_type = MP_PROCESSOR;
processor.mpc_apicid = id;
processor.mpc_apicver = 0x10; /* TBD: lapic version */
@@ -713,7 +705,6 @@ void __init mp_register_lapic (
processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
processor.mpc_reserved[0] = 0;
processor.mpc_reserved[1] = 0;
-#endif
MP_processor_info(&processor);
}
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic.c
===================================================================
--- /dev/null
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Generic APIC sub-arch probe layer.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ */
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+#include <asm/smp.h>
+#include <asm/ipi.h>
+
+#if defined(CONFIG_ACPI_BUS)
+#include <acpi/acpi_bus.h>
+#endif
+
+/* which logical CPU number maps to which CPU (physical APIC ID) */
+u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+EXPORT_SYMBOL(x86_cpu_to_apicid);
+u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
+
+extern struct genapic apic_cluster;
+extern struct genapic apic_flat;
+extern struct genapic apic_xen;
+
+struct genapic *genapic = &apic_xen;
+
+/*
+ * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
+ */
+void __init clustered_apic_check(void)
+{
+ long i;
+ u8 clusters, max_cluster;
+ u8 id;
+ u8 cluster_cnt[NUM_APIC_CLUSTERS];
+
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ /* AMD always uses flat mode right now */
+ genapic = &apic_flat;
+ goto print;
+ }
+
+#if defined(CONFIG_ACPI_BUS)
+ /*
+ * Some x86_64 machines use physical APIC mode regardless of how many
+ * procs/clusters are present (x86_64 ES7000 is an example).
+ */
+ if (acpi_fadt.revision > FADT2_REVISION_ID)
+ if (acpi_fadt.force_apic_physical_destination_mode) {
+ genapic = &apic_cluster;
+ goto print;
+ }
+#endif
+
+ memset(cluster_cnt, 0, sizeof(cluster_cnt));
+
+ for (i = 0; i < NR_CPUS; i++) {
+ id = bios_cpu_apicid[i];
+ if (id != BAD_APICID)
+ cluster_cnt[APIC_CLUSTERID(id)]++;
+ }
+
+ clusters = 0;
+ max_cluster = 0;
+ for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
+ if (cluster_cnt[i] > 0) {
+ ++clusters;
+ if (cluster_cnt[i] > max_cluster)
+ max_cluster = cluster_cnt[i];
+ }
+ }
+
+ /*
+ * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode,
+ * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical
+ * else physical mode.
+ * (We don't use lowest priority delivery + HW APIC IRQ steering, so
+ * can ignore the clustered logical case and go straight to physical.)
+ */
+ if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster)
+ genapic = &apic_flat;
+ else
+ genapic = &apic_cluster;
+
+print:
+ /* hardcode to xen apic functions */
+ genapic = &apic_xen;
+ printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
+}
+
+/* Same for both flat and clustered. */
+
+extern void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned
int dest);
+
+void send_IPI_self(int vector)
+{
+ xen_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+}
Index: xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic_xen.c
===================================================================
--- /dev/null
+++ xen-unstable/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/genapic_xen.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Xen APIC subarch code. Maximum 8 CPUs, logical delivery.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ *
+ * Hacked to pieces for Xen by Chris Wright.
+ */
+#include <linux/config.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#include <asm-xen/evtchn.h>
+
+DECLARE_PER_CPU(int, ipi_to_evtchn[NR_IPIS]);
+
+static inline void __send_IPI_one(unsigned int cpu, int vector)
+{
+ unsigned int evtchn;
+ Dprintk("%s\n", __FUNCTION__);
+
+ evtchn = per_cpu(ipi_to_evtchn, cpu)[vector];
+ if (evtchn)
+ notify_via_evtchn(evtchn);
+ else
+ printk("send_IPI to unbound port %d/%d", cpu, vector);
+
+}
+
+void xen_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int
dest)
+{
+ int cpu;
+
+ switch (shortcut) {
+ case APIC_DEST_SELF:
+ __send_IPI_one(smp_processor_id(), vector);
+ break;
+ case APIC_DEST_ALLBUT:
+ for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ if (cpu_isset(cpu, cpu_online_map)) {
+ __send_IPI_one(cpu, vector);
+ }
+ }
+ break;
+ case APIC_DEST_ALLINC:
+ for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+ if (cpu_isset(cpu, cpu_online_map)) {
+ __send_IPI_one(cpu, vector);
+ }
+ }
+ break;
+ default:
+ printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
+ vector);
+ break;
+ }
+
+}
+
+static cpumask_t xen_target_cpus(void)
+{
+ return cpu_online_map;
+}
+
+/*
+ * Set up the logical destination ID.
+ * Do nothing, not called now.
+ */
+static void xen_init_apic_ldr(void)
+{
+ Dprintk("%s\n", __FUNCTION__);
+ return;
+}
+
+static void xen_send_IPI_allbutself(int vector)
+{
+ /*
+ * if there are no other CPUs in the system then
+ * we get an APIC send error if we try to broadcast.
+ * thus we have to avoid sending IPIs in this case.
+ */
+ Dprintk("%s\n", __FUNCTION__);
+ if (num_online_cpus() > 1)
+ xen_send_IPI_shortcut(APIC_DEST_ALLBUT, vector,
APIC_DEST_LOGICAL);
+}
+
+static void xen_send_IPI_all(int vector)
+{
+ Dprintk("%s\n", __FUNCTION__);
+ xen_send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
+}
+
+static void xen_send_IPI_mask(cpumask_t cpumask, int vector)
+{
+ unsigned long mask = cpus_addr(cpumask)[0];
+ unsigned int cpu;
+ unsigned long flags;
+
+ Dprintk("%s\n", __FUNCTION__);
+ local_irq_save(flags);
+ WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
+
+ for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+ if (cpu_isset(cpu, cpumask)) {
+ __send_IPI_one(cpu, vector);
+ }
+ }
+ local_irq_restore(flags);
+}
+
+static int xen_apic_id_registered(void)
+{
+ /* better be set */
+ Dprintk("%s\n", __FUNCTION__);
+ return physid_isset(smp_processor_id(), phys_cpu_present_map);
+}
+
+static unsigned int xen_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+ Dprintk("%s\n", __FUNCTION__);
+ return cpus_addr(cpumask)[0] & APIC_ALL_CPUS;
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+ u32 ebx;
+
+ Dprintk("%s\n", __FUNCTION__);
+ ebx = cpuid_ebx(1);
+ return ((ebx >> 24) & 0xFF) >> index_msb;
+}
+
+struct genapic apic_xen = {
+ .name = "xen",
+ .int_delivery_mode = dest_LowestPrio,
+ .int_dest_mode = (APIC_DEST_LOGICAL != 0),
+ .int_delivery_dest = APIC_DEST_LOGICAL | APIC_DM_LOWEST,
+ .target_cpus = xen_target_cpus,
+ .apic_id_registered = xen_apic_id_registered,
+ .init_apic_ldr = xen_init_apic_ldr,
+ .send_IPI_all = xen_send_IPI_all,
+ .send_IPI_allbutself = xen_send_IPI_allbutself,
+ .send_IPI_mask = xen_send_IPI_mask,
+ .cpu_mask_to_apicid = xen_cpu_mask_to_apicid,
+ .phys_pkg_id = phys_pkg_id,
+};
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|