# HG changeset patch # User gingold@virtu10 # Node ID b0e9c8cf664a9a52bfe4a98d423d31d3c36e0396 # Parent 7423c81cb3e39063f31bb87ffe1551630fccb1f6 Save & restore. xc_ia64_linux_save.c and xc_ia64_linux_restore.c added. vcpu context has more registers and states (eg: tr registers). Per cpu irqs are deallocated when cpu is switched off. #if/#endif added in reboot.c for ia64. Signed-off-by: Tristan Gingold diff -r 7423c81cb3e3 -r b0e9c8cf664a linux-2.6-xen-sparse/arch/ia64/Kconfig --- a/linux-2.6-xen-sparse/arch/ia64/Kconfig Mon Jul 10 13:59:54 2006 +0200 +++ b/linux-2.6-xen-sparse/arch/ia64/Kconfig Tue Jul 11 09:44:03 2006 +0200 @@ -518,7 +518,7 @@ config XEN_DEVMEM default n config XEN_REBOOT - default n + default y config XEN_SMPBOOT default n diff -r 7423c81cb3e3 -r b0e9c8cf664a linux-2.6-xen-sparse/arch/ia64/kernel/irq_ia64.c --- a/linux-2.6-xen-sparse/arch/ia64/kernel/irq_ia64.c Mon Jul 10 13:59:54 2006 +0200 +++ b/linux-2.6-xen-sparse/arch/ia64/kernel/irq_ia64.c Tue Jul 11 09:44:03 2006 +0200 @@ -31,6 +31,9 @@ #include #include #include +#ifdef CONFIG_XEN +#include +#endif #include #include @@ -235,6 +238,9 @@ static struct irqaction ipi_irqaction = #include #include +static DEFINE_PER_CPU(int, timer_irq) = -1; +static DEFINE_PER_CPU(int, ipi_irq) = -1; +static DEFINE_PER_CPU(int, resched_irq) = -1; static char timer_name[NR_CPUS][15]; static char ipi_name[NR_CPUS][15]; static char resched_name[NR_CPUS][15]; @@ -294,6 +300,7 @@ xen_register_percpu_irq (unsigned int ir ret = bind_virq_to_irqhandler(VIRQ_ITC, cpu, action->handler, action->flags, timer_name[cpu], action->dev_id); + per_cpu(timer_irq,cpu) = ret; printk(KERN_INFO "register VIRQ_ITC (%s) to xen irq (%d)\n", timer_name[cpu], ret); break; case IA64_IPI_RESCHEDULE: @@ -301,6 +308,7 @@ xen_register_percpu_irq (unsigned int ir ret = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR, cpu, action->handler, action->flags, resched_name[cpu], action->dev_id); + per_cpu(resched_irq,cpu) = ret; printk(KERN_INFO "register RESCHEDULE_VECTOR (%s) to xen irq (%d)\n", resched_name[cpu], ret); break; case IA64_IPI_VECTOR: @@ -308,6 +316,7 @@ xen_register_percpu_irq (unsigned int ir ret = bind_ipi_to_irqhandler(IPI_VECTOR, cpu, action->handler, action->flags, ipi_name[cpu], action->dev_id); + per_cpu(ipi_irq,cpu) = ret; printk(KERN_INFO "register IPI_VECTOR (%s) to xen irq (%d)\n", ipi_name[cpu], ret); break; case IA64_SPURIOUS_INT_VECTOR: @@ -343,7 +352,7 @@ xen_bind_early_percpu_irq (void) */ for (i = 0; i < late_irq_cnt; i++) xen_register_percpu_irq(saved_percpu_irqs[i].irq, - saved_percpu_irqs[i].action, 0); + saved_percpu_irqs[i].action, 0); } /* FIXME: There's no obvious point to check whether slab is ready. So @@ -352,6 +361,38 @@ extern void (*late_time_init)(void); extern void (*late_time_init)(void); extern char xen_event_callback; extern void xen_init_IRQ(void); + +#ifdef CONFIG_HOTPLUG_CPU +static int __devinit unbind_evtchn_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + if (action == CPU_DEAD) { + /* Unregister evtchn. */ + if (per_cpu(ipi_irq,cpu) >= 0) { + unbind_from_irqhandler (per_cpu(ipi_irq, cpu), NULL); + per_cpu(ipi_irq, cpu) = -1; + } + if (per_cpu(resched_irq,cpu) >= 0) { + unbind_from_irqhandler (per_cpu(resched_irq, cpu), + NULL); + per_cpu(resched_irq, cpu) = -1; + } + if (per_cpu(timer_irq,cpu) >= 0) { + unbind_from_irqhandler (per_cpu(timer_irq, cpu), NULL); + per_cpu(timer_irq, cpu) = -1; + } + } + return NOTIFY_OK; +} + +static struct notifier_block unbind_evtchn_notifier = { + .notifier_call = unbind_evtchn_callback, + .priority = 0 +}; +#endif DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]); void xen_smp_intr_init(void) @@ -363,21 +404,22 @@ void xen_smp_intr_init(void) .type = CALLBACKTYPE_event, .address = (unsigned long)&xen_event_callback, }; - static cpumask_t registered_cpumask; - - if (!cpu) + + if (cpu == 0) { + /* Initialization was already done for boot cpu. */ +#ifdef CONFIG_HOTPLUG_CPU + /* Register the notifier only once. */ + register_cpu_notifier(&unbind_evtchn_notifier); +#endif return; + } /* This should be piggyback when setup vcpu guest context */ BUG_ON(HYPERVISOR_callback_op(CALLBACKOP_register, &event)); - if (!cpu_isset(cpu, registered_cpumask)) { - cpu_set(cpu, registered_cpumask); - for (i = 0; i < saved_irq_cnt; i++) - xen_register_percpu_irq(saved_percpu_irqs[i].irq, - saved_percpu_irqs[i].action, - 0); - } + for (i = 0; i < saved_irq_cnt; i++) + xen_register_percpu_irq(saved_percpu_irqs[i].irq, + saved_percpu_irqs[i].action, 0); #endif /* CONFIG_SMP */ } #endif /* CONFIG_XEN */ @@ -388,12 +430,13 @@ register_percpu_irq (ia64_vector vec, st irq_desc_t *desc; unsigned int irq; +#ifdef CONFIG_XEN + if (is_running_on_xen()) + return xen_register_percpu_irq(vec, action, 1); +#endif + for (irq = 0; irq < NR_IRQS; ++irq) if (irq_to_vector(irq) == vec) { -#ifdef CONFIG_XEN - if (is_running_on_xen()) - return xen_register_percpu_irq(vec, action, 1); -#endif desc = irq_descp(irq); desc->status |= IRQ_PER_CPU; desc->handler = &irq_type_ia64_lsapic; @@ -405,6 +448,7 @@ void __init void __init init_IRQ (void) { + printk(KERN_INFO"init_IRQ called from %p\n", __builtin_return_address (0)); #ifdef CONFIG_XEN /* Maybe put into platform_irq_init later */ if (is_running_on_xen()) { diff -r 7423c81cb3e3 -r b0e9c8cf664a linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c --- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Mon Jul 10 13:59:54 2006 +0200 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Tue Jul 11 09:44:03 2006 +0200 @@ -795,3 +795,13 @@ direct_remap_pfn_range(struct vm_area_st return error; } + +/* Called after suspend, to resume time. */ +void +time_resume(void) +{ + extern void ia64_cpu_local_tick (void); + + /* Just trigger a tick. */ + ia64_cpu_local_tick (); +} diff -r 7423c81cb3e3 -r b0e9c8cf664a linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S --- a/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S Mon Jul 10 13:59:54 2006 +0200 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xensetup.S Tue Jul 11 09:44:03 2006 +0200 @@ -33,3 +33,23 @@ GLOBAL_ENTRY(early_xen_setup) br.ret.sptk.many rp ;; END(early_xen_setup) + +#include + +/* Stub for suspend. + Just force the stacked registers to be written in memory. */ +GLOBAL_ENTRY(HYPERVISOR_suspend) + alloc r20=ar.pfs,0,0,0,0 + mov r14=2 + mov r15=r12 + ;; + /* We don't want to deal with RSE. */ + flushrs + mov r2=__HYPERVISOR_sched_op + st4 [r12]=r14 + ;; + break 0x1000 + ;; + mov ar.pfs=r20 + br.ret.sptk.many b0 +END(HYPERVISOR_suspend) diff -r 7423c81cb3e3 -r b0e9c8cf664a linux-2.6-xen-sparse/drivers/xen/core/reboot.c --- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Mon Jul 10 13:59:54 2006 +0200 +++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c Tue Jul 11 09:44:03 2006 +0200 @@ -39,6 +39,7 @@ extern void ctrl_alt_del(void); */ #define SHUTDOWN_HALT 4 +#if defined(__i386__) || defined(__x86_64__) void machine_emergency_restart(void) { /* We really want to get pending console data out before we die. */ @@ -60,10 +61,8 @@ void machine_power_off(void) { /* We really want to get pending console data out before we die. */ xencons_force_flush(); -#if defined(__i386__) || defined(__x86_64__) if (pm_power_off) pm_power_off(); -#endif HYPERVISOR_shutdown(SHUTDOWN_poweroff); } @@ -71,7 +70,7 @@ EXPORT_SYMBOL(machine_restart); EXPORT_SYMBOL(machine_restart); EXPORT_SYMBOL(machine_halt); EXPORT_SYMBOL(machine_power_off); - +#endif /****************************************************************************** * Stop/pickle callback handling. @@ -82,6 +81,7 @@ static void __shutdown_handler(void *unu static void __shutdown_handler(void *unused); static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL); +#ifdef CONFIG_X86 /* Ensure we run on the idle task page tables so that we will switch page tables before running user space. This is needed on architectures with separate kernel and user page tables @@ -98,25 +98,30 @@ static void switch_idle_mm(void) current->active_mm = &init_mm; mmdrop(mm); } +#endif static int __do_suspend(void *ignore) { - int i, j, k, fpp, err; - + int err; +#if defined(__i386__) || defined(__x86_64__) + int i, j, k, fpp; extern unsigned long max_pfn; extern unsigned long *pfn_to_mfn_frame_list_list; extern unsigned long *pfn_to_mfn_frame_list[]; +#endif extern void time_resume(void); BUG_ON(smp_processor_id() != 0); BUG_ON(in_interrupt()); +#if defined(__i386__) || defined(__x86_64__) if (xen_feature(XENFEAT_auto_translated_physmap)) { printk(KERN_WARNING "Cannot suspend in " "auto_translated_physmap mode.\n"); return -EOPNOTSUPP; } +#endif err = smp_suspend(); if (err) @@ -129,18 +134,24 @@ static int __do_suspend(void *ignore) #ifdef __i386__ kmem_cache_shrink(pgd_cache); #endif +#if defined(__i386__) || defined(__x86_64__) mm_pin_all(); __cli(); +#elif defined (__ia64__) + local_irq_disable(); +#endif preempt_enable(); gnttab_suspend(); +#if defined(__i386__) || defined(__x86_64__) HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; clear_fixmap(FIX_SHARED_INFO); xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); xen_start_info->console_mfn = mfn_to_pfn(xen_start_info->console_mfn); +#endif /* * We'll stop somewhere inside this hypercall. When it returns, @@ -150,6 +161,7 @@ static int __do_suspend(void *ignore) shutting_down = SHUTDOWN_INVALID; +#if defined(__i386__) || defined(__x86_64__) set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); @@ -171,6 +183,7 @@ static int __do_suspend(void *ignore) virt_to_mfn(&phys_to_machine_mapping[i]); } HYPERVISOR_shared_info->arch.max_pfn = max_pfn; +#endif gnttab_resume(); @@ -178,9 +191,13 @@ static int __do_suspend(void *ignore) time_resume(); +#if defined(__i386__) || defined(__x86_64__) switch_idle_mm(); __sti(); +#elif defined (__ia64__) + local_irq_enable(); +#endif xencons_resume(); diff -r 7423c81cb3e3 -r b0e9c8cf664a linux-2.6-xen-sparse/include/asm-ia64/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Mon Jul 10 13:59:54 2006 +0200 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Tue Jul 11 09:44:03 2006 +0200 @@ -302,23 +302,7 @@ HYPERVISOR_vcpu_op( return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); } -static inline int -HYPERVISOR_suspend( - unsigned long srec) -{ - struct sched_shutdown sched_shutdown = { - .reason = SHUTDOWN_suspend - }; - - int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, - &sched_shutdown, srec); - - if (rc == -ENOSYS) - rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, - SHUTDOWN_suspend, srec); - - return rc; -} +extern int HYPERVISOR_suspend(unsigned long srec); static inline int HYPERVISOR_callback_op( diff -r 7423c81cb3e3 -r b0e9c8cf664a tools/libxc/ia64/Makefile --- a/tools/libxc/ia64/Makefile Mon Jul 10 13:59:54 2006 +0200 +++ b/tools/libxc/ia64/Makefile Tue Jul 11 09:44:03 2006 +0200 @@ -1,3 +1,5 @@ CTRL_SRCS-$(CONFIG_IA64) += ia64/xc_ia64 CTRL_SRCS-$(CONFIG_IA64) += ia64/xc_ia64_stubs.c GUEST_SRCS-$(CONFIG_IA64) += ia64/xc_ia64_hvm_build.c +GUEST_SRCS-$(CONFIG_IA64) += ia64/xc_ia64_linux_save.c +GUEST_SRCS-$(CONFIG_IA64) += ia64/xc_ia64_linux_restore.c diff -r 7423c81cb3e3 -r b0e9c8cf664a tools/libxc/ia64/xc_ia64_stubs.c --- a/tools/libxc/ia64/xc_ia64_stubs.c Mon Jul 10 13:59:54 2006 +0200 +++ b/tools/libxc/ia64/xc_ia64_stubs.c Tue Jul 11 09:44:03 2006 +0200 @@ -20,22 +20,6 @@ unsigned long xc_ia64_fpsr_default(void) unsigned long xc_ia64_fpsr_default(void) { return FPSR_DEFAULT; -} - -int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, - uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */, - int (*suspend)(int domid)) -{ - PERROR("xc_linux_save not implemented\n"); - return -1; -} - -int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, unsigned long nr_pfns, - unsigned int store_evtchn, unsigned long *store_mfn, - unsigned int console_evtchn, unsigned long *console_mfn) -{ - PERROR("xc_linux_restore not implemented\n"); - return -1; } /* diff -r 7423c81cb3e3 -r b0e9c8cf664a tools/libxc/xc_linux_build.c --- a/tools/libxc/xc_linux_build.c Mon Jul 10 13:59:54 2006 +0200 +++ b/tools/libxc/xc_linux_build.c Tue Jul 11 09:44:03 2006 +0200 @@ -498,6 +498,7 @@ static int setup_guest(int xc_handle, start_info_mpa = (nr_pages - 3) << PAGE_SHIFT; /* Build firmware. */ + memset (&op.u.domain_setup, 0, sizeof (op.u.domain_setup)); op.u.domain_setup.flags = 0; op.u.domain_setup.domain = (domid_t)dom; op.u.domain_setup.bp = start_info_mpa + sizeof (start_info_t); diff -r 7423c81cb3e3 -r b0e9c8cf664a xen/arch/ia64/xen/dom0_ops.c --- a/xen/arch/ia64/xen/dom0_ops.c Mon Jul 10 13:59:54 2006 +0200 +++ b/xen/arch/ia64/xen/dom0_ops.c Tue Jul 11 09:44:03 2006 +0200 @@ -224,19 +224,43 @@ long arch_do_dom0_op(dom0_op_t *op, XEN_ break; } - if (ds->flags & XEN_DOMAINSETUP_hvm_guest) { - if (!vmx_enabled) { - printk("No VMX hardware feature for vmx domain.\n"); - ret = -EINVAL; - break; - } - d->arch.is_vti = 1; - vmx_setup_platform(d); + if (ds->flags & XEN_DOMAINSETUP_query) { + /* Set flags. */ + if (d->arch.is_vti) + ds->flags |= XEN_DOMAINSETUP_hvm_guest; + /* Set params. */ + ds->bp = 0; /* unknown. */ + ds->maxmem = 0; /* unknown. */ + ds->xsi_va = d->arch.shared_info_va; + ds->hypercall_imm = d->arch.breakimm; + /* Copy back. */ + if ( copy_to_guest(u_dom0_op, op, 1) ) + ret = -EFAULT; } else { - build_physmap_table(d); - dom_fw_setup(d, ds->bp, ds->maxmem); - } + if (ds->flags & XEN_DOMAINSETUP_hvm_guest) { + if (!vmx_enabled) { + printk("No VMX hardware feature for vmx domain.\n"); + ret = -EINVAL; + break; + } + d->arch.is_vti = 1; + vmx_setup_platform(d); + } + else { + build_physmap_table(d); + dom_fw_setup(d, ds->bp, ds->maxmem); + if (ds->xsi_va) + d->arch.shared_info_va = ds->xsi_va; + if (ds->hypercall_imm) { + struct vcpu *v; + d->arch.breakimm = ds->hypercall_imm; + for_each_vcpu (d, v) + v->arch.breakimm = d->arch.breakimm; + } + } + } + put_domain(d); } break; diff -r 7423c81cb3e3 -r b0e9c8cf664a xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Mon Jul 10 13:59:54 2006 +0200 +++ b/xen/arch/ia64/xen/domain.c Tue Jul 11 09:44:03 2006 +0200 @@ -396,8 +396,28 @@ void arch_domain_destroy(struct domain * void arch_getdomaininfo_ctxt(struct vcpu *v, struct vcpu_guest_context *c) { + int i; + struct vcpu_extra_regs *er = &c->extra_regs; + c->user_regs = *vcpu_regs (v); c->privregs_pfn = virt_to_maddr(v->arch.privregs) >> PAGE_SHIFT; + + /* Fill extra regs. */ + for (i = 0; i < 8; i++) { + er->itrs[i].pte = v->arch.itrs[i].pte.val; + er->itrs[i].itir = v->arch.itrs[i].itir; + er->itrs[i].vadr = v->arch.itrs[i].vadr; + er->itrs[i].rid = v->arch.itrs[i].rid; + } + for (i = 0; i < 8; i++) { + er->dtrs[i].pte = v->arch.dtrs[i].pte.val; + er->dtrs[i].itir = v->arch.dtrs[i].itir; + er->dtrs[i].vadr = v->arch.dtrs[i].vadr; + er->dtrs[i].rid = v->arch.dtrs[i].rid; + } + er->event_callback_ip = v->arch.event_callback_ip; + er->dcr = v->arch.dcr; + er->iva = v->arch.iva; } int arch_set_info_guest(struct vcpu *v, struct vcpu_guest_context *c) @@ -412,6 +432,28 @@ int arch_set_info_guest(struct vcpu *v, regs->cr_ipsr |= 2UL << IA64_PSR_CPL0_BIT; regs->ar_rsc |= (2 << 2); /* force PL2/3 */ } + + if (c->flags & VGCF_EXTRA_REGS) { + int i; + struct vcpu_extra_regs *er = &c->extra_regs; + + for (i = 0; i < 8; i++) { + vcpu_set_itr (v, i, er->itrs[i].pte, + er->itrs[i].itir, + er->itrs[i].vadr, + er->itrs[i].rid); + } + for (i = 0; i < 8; i++) { + vcpu_set_dtr (v, i, + er->dtrs[i].pte, + er->dtrs[i].itir, + er->dtrs[i].vadr, + er->dtrs[i].rid); + } + v->arch.event_callback_ip = er->event_callback_ip; + v->arch.dcr = er->dcr; + v->arch.iva = er->iva; + } if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) ) return 0; diff -r 7423c81cb3e3 -r b0e9c8cf664a xen/arch/ia64/xen/vcpu.c --- a/xen/arch/ia64/xen/vcpu.c Mon Jul 10 13:59:54 2006 +0200 +++ b/xen/arch/ia64/xen/vcpu.c Tue Jul 11 09:44:03 2006 +0200 @@ -1880,13 +1880,15 @@ IA64FAULT vcpu_set_pkr(VCPU *vcpu, UINT6 VCPU translation register access routines **************************************************************************/ -static void vcpu_set_tr_entry(TR_ENTRY *trp, UINT64 pte, UINT64 itir, UINT64 ifa) +static void vcpu_set_tr_entry_rid(TR_ENTRY *trp, + UINT64 pte, UINT64 itir, + UINT64 ifa, UINT64 rid) { UINT64 ps; union pte_flags new_pte; trp->itir = itir; - trp->rid = VCPU(current,rrs[ifa>>61]) & RR_RID_MASK; + trp->rid = rid; ps = trp->ps; new_pte.val = pte; if (new_pte.pl < 2) new_pte.pl = 2; @@ -1900,8 +1902,15 @@ static void vcpu_set_tr_entry(TR_ENTRY * trp->pte.val = new_pte.val; } +static inline void +vcpu_set_tr_entry(TR_ENTRY *trp, UINT64 pte, UINT64 itir, UINT64 ifa) +{ + vcpu_set_tr_entry_rid (trp, pte, itir, ifa, + VCPU(current,rrs[ifa>>61]) & RR_RID_MASK); +} + IA64FAULT vcpu_itr_d(VCPU *vcpu, UINT64 slot, UINT64 pte, - UINT64 itir, UINT64 ifa) + UINT64 itir, UINT64 ifa) { TR_ENTRY *trp; @@ -1914,7 +1923,7 @@ IA64FAULT vcpu_itr_d(VCPU *vcpu, UINT64 } IA64FAULT vcpu_itr_i(VCPU *vcpu, UINT64 slot, UINT64 pte, - UINT64 itir, UINT64 ifa) + UINT64 itir, UINT64 ifa) { TR_ENTRY *trp; @@ -1923,6 +1932,44 @@ IA64FAULT vcpu_itr_i(VCPU *vcpu, UINT64 //printf("***** itr.i: setting slot %d: ifa=%p\n",slot,ifa); vcpu_set_tr_entry(trp,pte,itir,ifa); vcpu_quick_region_set(PSCBX(vcpu,itr_regions),ifa); + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_set_itr(VCPU *vcpu, u64 slot, u64 pte, + u64 itir, u64 ifa, u64 rid) +{ + TR_ENTRY *trp; + + if (slot >= NITRS) + return IA64_RSVDREG_FAULT; + trp = &PSCBX(vcpu,itrs[slot]); + vcpu_set_tr_entry_rid(trp,pte,itir,ifa, rid); + + /* Recompute the itr_region. */ + vcpu->arch.itr_regions = 0; + for (trp = vcpu->arch.itrs; trp < &vcpu->arch.itrs[NITRS]; trp++) + if (trp->pte.p) + vcpu_quick_region_set(vcpu->arch.itr_regions, + trp->vadr); + return IA64_NO_FAULT; +} + +IA64FAULT vcpu_set_dtr(VCPU *vcpu, u64 slot, u64 pte, + u64 itir, u64 ifa, u64 rid) +{ + TR_ENTRY *trp; + + if (slot >= NDTRS) + return IA64_RSVDREG_FAULT; + trp = &PSCBX(vcpu,dtrs[slot]); + vcpu_set_tr_entry_rid(trp,pte,itir,ifa, rid); + + /* Recompute the dtr_region. */ + vcpu->arch.dtr_regions = 0; + for (trp = vcpu->arch.dtrs; trp < &vcpu->arch.dtrs[NDTRS]; trp++) + if (trp->pte.p) + vcpu_quick_region_set(vcpu->arch.dtr_regions, + trp->vadr); return IA64_NO_FAULT; } @@ -2147,7 +2194,6 @@ IA64FAULT vcpu_ptr_i(VCPU *vcpu,UINT64 v vcpu_quick_region_set(vcpu->arch.itr_regions, trp->vadr); - vcpu_flush_tlb_vhpt_range (vadr, log_range); return IA64_NO_FAULT; diff -r 7423c81cb3e3 -r b0e9c8cf664a xen/include/asm-ia64/vcpu.h --- a/xen/include/asm-ia64/vcpu.h Mon Jul 10 13:59:54 2006 +0200 +++ b/xen/include/asm-ia64/vcpu.h Tue Jul 11 09:44:03 2006 +0200 @@ -162,6 +162,11 @@ extern UINT64 vcpu_get_tmp(VCPU *, UINT6 extern UINT64 vcpu_get_tmp(VCPU *, UINT64); extern void vcpu_set_tmp(VCPU *, UINT64, UINT64); +extern IA64FAULT vcpu_set_dtr(VCPU *vcpu, u64 slot, + u64 pte, u64 itir, u64 ifa, u64 rid); +extern IA64FAULT vcpu_set_itr(VCPU *vcpu, u64 slot, + u64 pte, u64 itir, u64 ifa, u64 rid); + /* Initialize vcpu regs. */ extern void vcpu_init_regs (struct vcpu *v); diff -r 7423c81cb3e3 -r b0e9c8cf664a xen/include/public/arch-ia64.h --- a/xen/include/public/arch-ia64.h Mon Jul 10 13:59:54 2006 +0200 +++ b/xen/include/public/arch-ia64.h Tue Jul 11 09:44:03 2006 +0200 @@ -302,10 +302,27 @@ typedef struct arch_shared_info arch_sha typedef unsigned long xen_callback_t; +struct ia64_tr_entry { + unsigned long pte; + unsigned long itir; + unsigned long vadr; + unsigned long rid; +}; + +struct vcpu_extra_regs { + struct ia64_tr_entry itrs[8]; + struct ia64_tr_entry dtrs[8]; + unsigned long iva; + unsigned long dcr; + unsigned long event_callback_ip; +}; + struct vcpu_guest_context { +#define VGCF_EXTRA_REGS (1<<1) /* Get/Set extra regs. */ unsigned long flags; /* VGCF_* flags */ struct cpu_user_regs user_regs; + struct vcpu_extra_regs extra_regs; unsigned long privregs_pfn; }; typedef struct vcpu_guest_context vcpu_guest_context_t; diff -r 7423c81cb3e3 -r b0e9c8cf664a xen/include/public/dom0_ops.h --- a/xen/include/public/dom0_ops.h Mon Jul 10 13:59:54 2006 +0200 +++ b/xen/include/public/dom0_ops.h Tue Jul 11 09:44:03 2006 +0200 @@ -518,12 +518,16 @@ DEFINE_XEN_GUEST_HANDLE(dom0_hypercall_i #define DOM0_DOMAIN_SETUP 49 #define _XEN_DOMAINSETUP_hvm_guest 0 #define XEN_DOMAINSETUP_hvm_guest (1UL<<_XEN_DOMAINSETUP_hvm_guest) +#define _XEN_DOMAINSETUP_query 1 /* Get parameters (for save) */ +#define XEN_DOMAINSETUP_query (1UL<<_XEN_DOMAINSETUP_query) typedef struct dom0_domain_setup { domid_t domain; /* domain to be affected */ unsigned long flags; /* XEN_DOMAINSETUP_* */ #ifdef __ia64__ unsigned long bp; /* mpaddr of boot param area */ unsigned long maxmem; /* Highest memory address for MDT. */ + unsigned long xsi_va; /* Xen shared_info area virtual address. */ + unsigned int hypercall_imm; /* Break imm for Xen hypercalls. */ #endif } dom0_domain_setup_t; DEFINE_XEN_GUEST_HANDLE(dom0_domain_setup_t); diff -r 7423c81cb3e3 -r b0e9c8cf664a tools/libxc/ia64/xc_ia64_linux_restore.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/ia64/xc_ia64_linux_restore.c Tue Jul 11 09:44:03 2006 +0200 @@ -0,0 +1,319 @@ +/****************************************************************************** + * xc_ia64_linux_restore.c + * + * Restore the state of a Linux session. + * + * Copyright (c) 2003, K A Fraser. + * Rewritten for ia64 by Tristan Gingold + */ + +#include +#include + +#include "xg_private.h" + +#define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10)) + +/* total number of pages used by the current guest */ +static unsigned long max_pfn; + +static ssize_t +read_exact(int fd, void *buf, size_t count) +{ + int r = 0, s; + unsigned char *b = buf; + + while (r < count) { + s = read(fd, &b[r], count - r); + if ((s == -1) && (errno == EINTR)) + continue; + if (s <= 0) { + break; + } + r += s; + } + + return (r == count) ? 1 : 0; +} + +static int +read_page (int xc_handle, int io_fd, uint32_t dom, unsigned long pfn) +{ + void *mem; + + mem = xc_map_foreign_range + (xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, pfn); + if (mem == NULL) { + ERR("cannot map page"); + return -1; + } + if (!read_exact(io_fd, mem, PAGE_SIZE)) { + ERR("Error when reading from state file (5)"); + return -1; + } + munmap(mem, PAGE_SIZE); + return 0; +} + +int xc_linux_restore(int xc_handle, int io_fd, + uint32_t dom, unsigned long nr_pfns, + unsigned int store_evtchn, unsigned long *store_mfn, + unsigned int console_evtchn, unsigned long *console_mfn) +{ + DECLARE_DOM0_OP; + int rc = 1, i; + unsigned long mfn, pfn; + unsigned long ver; + + /* The new domain's shared-info frame number. */ + unsigned long shared_info_frame; + unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */ + shared_info_t *shared_info = (shared_info_t *)shared_info_page; + + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + unsigned long *page_array = NULL; + + /* A temporary mapping of the guest's start_info page. */ + start_info_t *start_info; + + max_pfn = nr_pfns; + + DPRINTF("xc_linux_restore start: max_pfn = %ld\n", max_pfn); + + + if (!read_exact(io_fd, &ver, sizeof(unsigned long))) { + ERR("Error when reading version"); + goto out; + } + if (ver != 1) { + ERR("version of save doesn't match"); + goto out; + } + + if (mlock(&ctxt, sizeof(ctxt))) { + /* needed for build dom0 op, but might as well do early */ + ERR("Unable to mlock ctxt"); + return 1; + } + + /* Get the domain's shared-info frame. */ + op.cmd = DOM0_GETDOMAININFO; + op.u.getdomaininfo.domain = (domid_t)dom; + if (xc_dom0_op(xc_handle, &op) < 0) { + ERR("Could not get information on new domain"); + goto out; + } + shared_info_frame = op.u.getdomaininfo.shared_info_frame; + + if(xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) { + errno = ENOMEM; + goto out; + } + + if(xc_domain_memory_increase_reservation( + xc_handle, dom, max_pfn, 0, 0, NULL) != 0) { + ERR("Failed to increase reservation by %ld KB", PFN_TO_KB(max_pfn)); + errno = ENOMEM; + goto out; + } + + DPRINTF("Increased domain reservation by %ld KB\n", PFN_TO_KB(max_pfn)); + + if (!read_exact(io_fd, &op.u.domain_setup, sizeof(op.u.domain_setup))) { + ERR("read: domain setup"); + goto out; + } + + /* Build firmware (will be overwritten). */ + op.u.domain_setup.domain = (domid_t)dom; + op.u.domain_setup.flags &= ~XEN_DOMAINSETUP_query; + op.u.domain_setup.bp = ((nr_pfns - 3) << PAGE_SHIFT) + + sizeof (start_info_t); + op.u.domain_setup.maxmem = (nr_pfns - 3) << PAGE_SHIFT; + + op.cmd = DOM0_DOMAIN_SETUP; + if ( xc_dom0_op(xc_handle, &op) ) + goto out; + + /* Get pages. */ + if ( (page_array = malloc(max_pfn * sizeof(unsigned long))) == NULL ) + { + ERR("Could not allocate memory"); + goto out; + } + + if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, + 0, max_pfn) != max_pfn ) + { + ERR("Could not get the page frame list"); + goto out; + } + + DPRINTF("Reloading memory pages: 0%%\n"); + + while (1) { + if (!read_exact(io_fd, &mfn, sizeof(unsigned long))) { + ERR("Error when reading batch size"); + goto out; + } + if (mfn == INVALID_MFN) + break; + + pfn = page_array[mfn]; + + DPRINTF ("xc_linux_restore: page %lu/%lu at %lx\n", mfn, max_pfn, pfn); + + if (read_page(xc_handle, io_fd, dom, page_array[mfn]) < 0) + goto out; + } + + DPRINTF("Received all pages\n"); + + /* Get the list of PFNs that are not in the psuedo-phys map */ + { + unsigned int count; + unsigned long *pfntab; + int rc; + + if (!read_exact(io_fd, &count, sizeof(count))) { + ERR("Error when reading pfn count"); + goto out; + } + + if(!(pfntab = malloc(sizeof(unsigned long) * count))) { + ERR("Out of memory"); + goto out; + } + + if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) { + ERR("Error when reading pfntab"); + goto out; + } + + DPRINTF ("Try to free %u pages\n", count); + + for (i = 0; i < count; i++) { + + volatile unsigned long pfn; + + struct xen_memory_reservation reservation = { + .nr_extents = 1, + .extent_order = 0, + .domid = dom + }; + set_xen_guest_handle(reservation.extent_start, + (unsigned long *)&pfn); + + pfn = pfntab[i]; + if ((rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation, + &reservation)) != 1) { + ERR("Could not decrease reservation : %d", rc); + goto out; + } + } + + DPRINTF("Decreased reservation by %d pages\n", count); + } + + + if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { + ERR("Error when reading ctxt"); + goto out; + } + + /* First to initialize. */ + op.cmd = DOM0_SETVCPUCONTEXT; + op.u.setvcpucontext.domain = (domid_t)dom; + op.u.setvcpucontext.vcpu = 0; + set_xen_guest_handle(op.u.setvcpucontext.ctxt, &ctxt); + if (xc_dom0_op(xc_handle, &op) != 0) { + ERR("Couldn't set vcpu context"); + goto out; + } + + /* Second to set registers... */ + ctxt.flags = VGCF_EXTRA_REGS; + op.cmd = DOM0_SETVCPUCONTEXT; + op.u.setvcpucontext.domain = (domid_t)dom; + op.u.setvcpucontext.vcpu = 0; + set_xen_guest_handle(op.u.setvcpucontext.ctxt, &ctxt); + if (xc_dom0_op(xc_handle, &op) != 0) { + ERR("Couldn't set vcpu context"); + goto out; + } + + /* Just a check. */ + if ( xc_vcpu_getcontext(xc_handle, dom, 0 /* XXX */, &ctxt)) { + ERR("Could not get vcpu context"); + goto out; + } + + /* Then get privreg page. */ + if (read_page (xc_handle, io_fd, dom, ctxt.privregs_pfn) < 0) { + ERR("Could not read vcpu privregs"); + goto out; + } + + /* Read shared info. */ + shared_info = xc_map_foreign_range + (xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, + shared_info_frame); + if (shared_info == NULL) { + ERR("cannot map page"); + goto out; + } + if (!read_exact(io_fd, shared_info, PAGE_SIZE)) { + ERR("Error when reading shared_info page"); + goto out; + } + + /* clear any pending events and the selector */ + memset(&(shared_info->evtchn_pending[0]), 0, + sizeof (shared_info->evtchn_pending)); + for ( i = 0; i < MAX_VIRT_CPUS; i++ ) + shared_info->vcpu_info[i].evtchn_pending_sel = 0; + + mfn = page_array[shared_info->arch.start_info_pfn]; + + munmap (shared_info, PAGE_SIZE); + + /* Uncanonicalise the suspend-record frame number and poke resume rec. */ + start_info = xc_map_foreign_range( + xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn); + start_info->nr_pages = max_pfn; + start_info->shared_info = shared_info_frame << PAGE_SHIFT; + start_info->flags = 0; + *store_mfn = page_array[start_info->store_mfn]; + start_info->store_evtchn = store_evtchn; + *console_mfn = page_array[start_info->console_mfn]; + start_info->console_evtchn = console_evtchn; + munmap(start_info, PAGE_SIZE); + + /* + * Safety checking of saved context: + * 1. user_regs is fine, as Xen checks that on context switch. + * 2. fpu_ctxt is fine, as it can't hurt Xen. + * 3. trap_ctxt needs the code selectors checked. + * 4. ldt base must be page-aligned, no more than 8192 ents, ... + * 5. gdt already done, and further checking is done by Xen. + * 6. check that kernel_ss is safe. + * 7. pt_base is already done. + * 8. debugregs are checked by Xen. + * 9. callback code selectors need checking. + */ + DPRINTF("Domain ready to be built.\n"); + + rc = 0; + + out: + if ( (rc != 0) && (dom != 0) ) + xc_domain_destroy(xc_handle, dom); + + free (page_array); + + DPRINTF("Restore exit with rc=%d\n", rc); + + return rc; +} diff -r 7423c81cb3e3 -r b0e9c8cf664a tools/libxc/ia64/xc_ia64_linux_save.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/ia64/xc_ia64_linux_save.c Tue Jul 11 09:44:03 2006 +0200 @@ -0,0 +1,294 @@ +/****************************************************************************** + * xc_ia64_linux_save.c + * + * Save the state of a running Linux session. + * + * Copyright (c) 2003, K A Fraser. + * Rewritten for ia64 by Tristan Gingold + */ + +#include +#include +#include +#include +#include + +#include "xg_private.h" + +/* total number of pages used by the current guest */ +static unsigned long max_pfn; + +static inline ssize_t write_exact(int fd, void *buf, size_t count) +{ + if(write(fd, buf, count) != count) + return 0; + return 1; +} + +static int suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd, + int dom, xc_dominfo_t *info) +{ + int i = 0; + + if (!(*suspend)(dom)) { + ERR("Suspend request failed"); + return -1; + } + + retry: + + if (xc_domain_getinfo(xc_handle, dom, 1, info) != 1) { + ERR("Could not get domain info"); + return -1; + } + + + if (info->shutdown && info->shutdown_reason == SHUTDOWN_suspend) + return 0; // success + + if (info->paused) { + // try unpausing domain, wait, and retest + xc_domain_unpause( xc_handle, dom ); + + ERR("Domain was paused. Wait and re-test."); + usleep(10000); // 10ms + + goto retry; + } + + + if( ++i < 100 ) { + ERR("Retry suspend domain."); + usleep(10000); // 10ms + goto retry; + } + + ERR("Unable to suspend domain."); + + return -1; +} + +int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters, + uint32_t max_factor, uint32_t flags, int (*suspend)(int)) +{ + DECLARE_DOM0_OP; + xc_dominfo_t info; + + int rc = 1; + unsigned long N; + + //int live = (flags & XCFLAGS_LIVE); + int debug = (flags & XCFLAGS_DEBUG); + + /* The new domain's shared-info frame number. */ + unsigned long shared_info_frame; + + /* A copy of the CPU context of the guest. */ + vcpu_guest_context_t ctxt; + + unsigned long *page_array = NULL; + + /* Live mapping of shared info structure */ + shared_info_t *live_shinfo = NULL; + + char *mem; + + if (debug) + fprintf (stderr, "xc_linux_save (ia64): started dom=%d\n", dom); + + if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) { + ERR("Could not get domain info"); + return 1; + } + + shared_info_frame = info.shared_info_frame; + +#if 0 + /* cheesy sanity check */ + if ((info.max_memkb >> (PAGE_SHIFT - 10)) > max_mfn) { + ERR("Invalid state record -- pfn count out of range: %lu", + (info.max_memkb >> (PAGE_SHIFT - 10))); + goto out; + } +#endif + + /* Map the shared info frame */ + if(!(live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ, shared_info_frame))) { + ERR("Couldn't map live_shinfo"); + goto out; + } + + max_pfn = info.max_memkb >> (PAGE_SHIFT - 10); + + + /* This is a non-live suspend. Issue the call back to get the + domain suspended */ + + if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) { + ERR("Domain appears not to have suspended"); + goto out; + } + + if ( (page_array = malloc(max_pfn * sizeof(unsigned long))) == NULL ) + { + ERR("Could not allocate memory"); + goto out; + } + + if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, + 0, max_pfn) != max_pfn ) + { + ERR("Could not get the page frame list"); + goto out; + } + + /* This is expected by xm restore. */ + if(!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) { + ERR("write: max_pfn"); + goto out; + } + + /* xc_linux_restore starts to read here. */ + /* Write a version number. This can avoid searching for a stupid bug + if the format change. + The version is hard-coded, don't forget to change the restore code + too! */ + N = 1; + if (!write_exact(io_fd, &N, sizeof(unsigned long))) { + ERR("write: version"); + goto out; + } + + op.cmd = DOM0_DOMAIN_SETUP; + op.u.domain_setup.domain = (domid_t)dom; + op.u.domain_setup.flags = XEN_DOMAINSETUP_query; + if (xc_dom0_op(xc_handle, &op) < 0) { + ERR("Could not get domain setup"); + goto out; + } + op.u.domain_setup.domain = 0; + if (!write_exact(io_fd, &op.u.domain_setup, sizeof(op.u.domain_setup))) { + ERR("write: domain setup"); + goto out; + } + + /* Start writing out the saved-domain record. */ + for (N = 0; N < max_pfn; N++) { + if (page_array[N] == INVALID_MFN) + continue; + if (debug) + fprintf (stderr, "xc_linux_save: page %lx (%lu/%lu)\n", + page_array[N], N, max_pfn); + + if(!write_exact(io_fd, &N, sizeof(N))) { + ERR("write: max_pfn"); + goto out; + } + + mem = xc_map_foreign_range + (xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, page_array[N]); + if (mem == NULL) { + ERR("cannot map page"); + goto out; + } + if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) { + ERR("Error when writing to state file (5)"); + goto out; + } + munmap(mem, PAGE_SIZE); + } + + fprintf (stderr, "All memory is saved\n"); + + /* terminate */ + N = INVALID_MFN; + if (!write_exact(io_fd, &N, sizeof(N))) { + ERR("Error when writing to state file (6)"); + goto out; + } + + /* Send through a list of all the PFNs that were not in map at the close */ + { + unsigned int i,j; + unsigned long pfntab[1024]; + + for (i = 0, j = 0; i < max_pfn; i++) { + if (page_array[i] == INVALID_MFN) + j++; + } + + if(!write_exact(io_fd, &j, sizeof(unsigned int))) { + ERR("Error when writing to state file (6a)"); + goto out; + } + + for (i = 0, j = 0; i < max_pfn; ) { + + if (page_array[i] == INVALID_MFN) + pfntab[j++] = i; + + i++; + if (j == 1024 || i == max_pfn) { + if(!write_exact(io_fd, &pfntab, sizeof(unsigned long)*j)) { + ERR("Error when writing to state file (6b)"); + goto out; + } + j = 0; + } + } + + } + + + if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) { + ERR("Could not get vcpu context"); + goto out; + } + + if (!write_exact(io_fd, &ctxt, sizeof(ctxt))) { + ERR("Error when writing to state file (1)"); + goto out; + } + + mem = xc_map_foreign_range + (xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, ctxt.privregs_pfn); + if (mem == NULL) { + ERR("cannot map privreg page"); + goto out; + } + if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) { + ERR("Error when writing privreg to state file (5)"); + goto out; + } + munmap(mem, PAGE_SIZE); + + if (!write_exact(io_fd, live_shinfo, PAGE_SIZE)) { + ERR("Error when writing to state file (1)"); + goto out; + } + + /* Success! */ + rc = 0; + + out: + + free (page_array); + + if (live_shinfo) + munmap(live_shinfo, PAGE_SIZE); + + fprintf(stderr,"Save exit rc=%d\n",rc); + + return !!rc; +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */