WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH] stack overflow during pv-guest restore

To: xen-devel@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-devel] [PATCH] stack overflow during pv-guest restore
From: Don Dutile <ddutile@xxxxxxxxxx>
Date: Thu, 31 Jan 2008 15:05:41 -0500
Delivery-date: Thu, 31 Jan 2008 12:06:42 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Reply-to: ddutile@xxxxxxxxxx
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Thunderbird 1.5.0.12 (X11/20070718)

When secondary cpus are initialized during an i386 pv-guest restore
(due to save/restore or live migration), and the guest has
a load that generates a fair number of interrupts (e.g., parallel kernel make),
a stack overflow can occur because cpu_initialize_context() has
a 2800 byte structure it declares on its stack.  linux-i386 has 4K stacks, by 
default.
Using 2800 bytes out of 4K by a single function in a call list isn't nice;
add the beginning of interrupt handling at just the right point, before the
switch to the interrupt stack is made... and... the scale tips...

Simple fix: malloc & free structure as needed.

Would fail save/restore testing of an i386-guest running a parallel, kernel 
make after
50->100 save/restores;  with the fix, haven't seen it fail after 650 
save/restores.

Note: this is a basic port of this function in Jeremy Fitzharinge's Xen 
implementation of pv-ops
      in upstream Linux, part of 15/32 patch, Xen SMP guest support.

Original patch done on rhel5; did a simple diff & merge to xen-unstable's 
version
of smpboot.c to generate the attached patch, so it cleanly applies; but
haven't built/run/tested the xen-unstable version.

Signed-off-by: Donald Dutile <ddutile@xxxxxxxxxx>


--- linux-2.6.18-xen.hg/drivers/xen/core/smpboot.c.orig 2008-01-31 
13:45:10.000000000 -0500
+++ linux-2.6.18-xen.hg/drivers/xen/core/smpboot.c      2008-01-31 
13:56:07.000000000 -0500
@@ -180,9 +180,9 @@
        cpu_idle();
 }
 
-static void __cpuinit cpu_initialize_context(unsigned int cpu)
+static int cpu_initialize_context(unsigned int cpu)
 {
-       vcpu_guest_context_t ctxt;
+       vcpu_guest_context_t *ctxt;
        struct task_struct *idle = idle_task(cpu);
 #ifdef __x86_64__
        struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
@@ -191,58 +191,65 @@
 #endif
 
        if (cpu_test_and_set(cpu, cpu_initialized_map))
-               return;
+               return 0;
 
-       memset(&ctxt, 0, sizeof(ctxt));
+       ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
+       if (ctxt == NULL)
+               return -ENOMEM;
 
-       ctxt.flags = VGCF_IN_KERNEL;
-       ctxt.user_regs.ds = __USER_DS;
-       ctxt.user_regs.es = __USER_DS;
-       ctxt.user_regs.fs = 0;
-       ctxt.user_regs.gs = 0;
-       ctxt.user_regs.ss = __KERNEL_DS;
-       ctxt.user_regs.eip = (unsigned long)cpu_bringup_and_idle;
-       ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */
+       memset(ctxt, 0, sizeof(*ctxt));
 
-       memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+       ctxt->flags = VGCF_IN_KERNEL;
+       ctxt->user_regs.ds = __USER_DS;
+       ctxt->user_regs.es = __USER_DS;
+       ctxt->user_regs.fs = 0;
+       ctxt->user_regs.gs = 0;
+       ctxt->user_regs.ss = __KERNEL_DS;
+       ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
+       ctxt->user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */
 
-       smp_trap_init(ctxt.trap_ctxt);
+       memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
 
-       ctxt.ldt_ents = 0;
+       smp_trap_init(ctxt->trap_ctxt);
 
-       ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address);
-       ctxt.gdt_ents      = gdt_descr->size / 8;
+       ctxt->ldt_ents = 0;
+
+       ctxt->gdt_frames[0] = virt_to_mfn(gdt_descr->address);
+       ctxt->gdt_ents      = gdt_descr->size / 8;
 
 #ifdef __i386__
-       ctxt.user_regs.cs = __KERNEL_CS;
-       ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
+       ctxt->user_regs.cs = __KERNEL_CS;
+       ctxt->user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
 
-       ctxt.kernel_ss = __KERNEL_DS;
-       ctxt.kernel_sp = idle->thread.esp0;
+       ctxt->kernel_ss = __KERNEL_DS;
+       ctxt->kernel_sp = idle->thread.esp0;
 
-       ctxt.event_callback_cs     = __KERNEL_CS;
-       ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
-       ctxt.failsafe_callback_cs  = __KERNEL_CS;
-       ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+       ctxt->event_callback_cs     = __KERNEL_CS;
+       ctxt->event_callback_eip    = (unsigned long)hypervisor_callback;
+       ctxt->failsafe_callback_cs  = __KERNEL_CS;
+       ctxt->failsafe_callback_eip = (unsigned long)failsafe_callback;
 
-       ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
+       ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
 #else /* __x86_64__ */
-       ctxt.user_regs.cs = __KERNEL_CS;
-       ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
+       ctxt->user_regs.cs = __KERNEL_CS;
+       ctxt->user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
 
-       ctxt.kernel_ss = __KERNEL_DS;
-       ctxt.kernel_sp = idle->thread.rsp0;
+       ctxt->kernel_ss = __KERNEL_DS;
+       ctxt->kernel_sp = idle->thread.rsp0;
 
-       ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
-       ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
-       ctxt.syscall_callback_eip  = (unsigned long)system_call;
+       ctxt->event_callback_eip    = (unsigned long)hypervisor_callback;
+       ctxt->failsafe_callback_eip = (unsigned long)failsafe_callback;
+       ctxt->syscall_callback_eip  = (unsigned long)system_call;
 
-       ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
+       ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
 
-       ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
+       ctxt->gs_base_kernel = (unsigned long)(cpu_pda(cpu));
 #endif
 
-       BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt));
+       BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt));
+
+       kfree(ctxt);
+       return 0;
 }
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
@@ -400,7 +407,9 @@
        if (rc)
                return rc;
 
-       cpu_initialize_context(cpu);
+       rc = cpu_initialize_context(cpu);
+       if (rc)
+               return rc;
 
        if (num_online_cpus() == 1)
                alternatives_smp_switch(1);
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-devel] [PATCH] stack overflow during pv-guest restore, Don Dutile <=