Hi,
during my test for cpupools I've found an issue in console output.
Sometimes the hypervisor hangs up due to a deadlock if something is printed
to the console via printk if a per-cpu scheduler lock is held by the printing
processor. Inside printk an event is sent to dom0 which in some cases leads to
a call of vcpu_wake resulting in the deadlock.
This problem occurs when calling BUG during holding the lock, too.
This issue is easily reproducable on a system with multiple cpus under low
load by calling
xm debug-keys r
to dump the schedulers run-queues. On my 4-core machine I need only about 5
calls to stop the machine.
The attached patch solves the problem by avoiding sending the event in
critical paths.
Juergen
--
Juergen Gross Principal Developer
IP SW OS6 Telephone: +49 (0) 89 636 47950
Fujitsu Siemens Computers e-mail: juergen.gross@xxxxxxxxxxxxxxxxxxx
Otto-Hahn-Ring 6 Internet: www.fujitsu-siemens.com
D-81739 Muenchen Company details: www.fujitsu-siemens.com/imprint.html
Signed-off-by: juergen.gross@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User juergen.gross@xxxxxxxxxxxxxxxxxxx
# Date 1236328387 -3600
# Node ID 0a7f637315e43205425da88aff3899c8e1ff6d11
# Parent 6315b66fbd5b25597ad2aa766aeda68d6852205d
avoid deadlocks in console output
diff -r 6315b66fbd5b -r 0a7f637315e4 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/arch/x86/traps.c Fri Mar 06 09:33:07 2009 +0100
@@ -389,6 +389,7 @@
{
watchdog_disable();
console_start_sync();
+ console_enter_critical();
show_execution_state(regs);
@@ -398,6 +399,7 @@
printk("Faulting linear address: %p\n", _p(cr2));
show_page_walk(cr2);
}
+ console_exit_critical();
}
panic("FATAL TRAP: vector = %d (%s)\n"
@@ -545,7 +547,9 @@
DEBUGGER_trap_fatal(trapnr, regs);
+ console_enter_critical();
show_execution_state(regs);
+ console_exit_critical();
panic("FATAL TRAP: vector = %d (%s)\n"
"[error_code=%04x]\n",
trapnr, trapstr(trapnr), regs->error_code);
@@ -866,7 +870,9 @@
if ( id == BUGFRAME_dump )
{
+ console_enter_critical();
show_execution_state(regs);
+ console_exit_critical();
regs->eip = (unsigned long)eip;
return;
}
@@ -883,17 +889,21 @@
if ( id == BUGFRAME_warn )
{
+ console_enter_critical();
printk("Xen WARN at %.50s:%d\n", filename, lineno);
show_execution_state(regs);
+ console_exit_critical();
regs->eip = (unsigned long)eip;
return;
}
if ( id == BUGFRAME_bug )
{
+ console_enter_critical();
printk("Xen BUG at %.50s:%d\n", filename, lineno);
DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
show_execution_state(regs);
+ console_exit_critical();
panic("Xen BUG at %.50s:%d\n", filename, lineno);
}
@@ -906,10 +916,12 @@
eip += sizeof(bug_str);
predicate = is_kernel(bug_str.str) ? (char *)bug_str.str : "<unknown>";
+ console_enter_critical();
printk("Assertion '%s' failed at %.50s:%d\n",
predicate, filename, lineno);
DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
show_execution_state(regs);
+ console_exit_critical();
panic("Assertion '%s' failed at %.50s:%d\n",
predicate, filename, lineno);
@@ -920,7 +932,9 @@
return;
}
DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
+ console_enter_critical();
show_execution_state(regs);
+ console_exit_critical();
panic("FATAL TRAP: vector = %d (invalid opcode)\n", TRAP_invalid_op);
}
@@ -945,10 +959,12 @@
static void reserved_bit_page_fault(
unsigned long addr, struct cpu_user_regs *regs)
{
+ console_enter_critical();
printk("d%d:v%d: reserved bit in page table (ec=%04X)\n",
current->domain->domain_id, current->vcpu_id, regs->error_code);
show_page_walk(addr);
show_execution_state(regs);
+ console_exit_critical();
}
void propagate_page_fault(unsigned long addr, u16 error_code)
@@ -1247,8 +1263,10 @@
DEBUGGER_trap_fatal(TRAP_page_fault, regs);
+ console_enter_critical();
show_execution_state(regs);
show_page_walk(addr);
+ console_exit_critical();
panic("FATAL PAGE FAULT\n"
"[error_code=%04x]\n"
"Faulting linear address: %p\n",
@@ -2757,7 +2775,9 @@
DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
hardware_gp:
+ console_enter_critical();
show_execution_state(regs);
+ console_exit_critical();
panic("GENERAL PROTECTION FAULT\n[error_code=%04x]\n", regs->error_code);
}
diff -r 6315b66fbd5b -r 0a7f637315e4 xen/common/schedule.c
--- a/xen/common/schedule.c Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/common/schedule.c Fri Mar 06 09:33:07 2009 +0100
@@ -930,10 +930,12 @@
for_each_online_cpu ( i )
{
+ console_enter_critical();
spin_lock(&per_cpu(schedule_data, i).schedule_lock);
printk("CPU[%02d] ", i);
SCHED_OP(dump_cpu_state, i);
spin_unlock(&per_cpu(schedule_data, i).schedule_lock);
+ console_exit_critical();
}
local_irq_restore(flags);
diff -r 6315b66fbd5b -r 0a7f637315e4 xen/drivers/char/console.c
--- a/xen/drivers/char/console.c Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/drivers/char/console.c Fri Mar 06 09:33:07 2009 +0100
@@ -414,6 +414,22 @@
* *****************************************************
*/
+/* don't try to wake up dom0 if schedule lock might be held, as this could
+ result in a deadlock! */
+
+static atomic_t console_crit_cnt = ATOMIC_INIT(0);
+
+void console_enter_critical(void)
+{
+ atomic_inc(&console_crit_cnt);
+}
+
+void console_exit_critical(void)
+{
+ BUG_ON(atomic_read(&console_crit_cnt) == 0);
+ atomic_dec(&console_crit_cnt);
+}
+
static void __putstr(const char *str)
{
int c;
@@ -426,7 +442,8 @@
while ( (c = *str++) != '\0' )
putchar_console_ring(c);
- send_guest_global_virq(dom0, VIRQ_CON_RING);
+ if (atomic_read(&console_crit_cnt) == 0)
+ send_guest_global_virq(dom0, VIRQ_CON_RING);
}
static int printk_prefix_check(char *p, char **pp)
@@ -915,6 +932,7 @@
static DEFINE_SPINLOCK(lock);
static char buf[128];
+ console_enter_critical();
debugtrace_dump();
/* Protects buf[] and ensure multi-line message prints atomically. */
@@ -935,6 +953,7 @@
printk("Reboot in five seconds...\n");
spin_unlock_irqrestore(&lock, flags);
+ console_exit_critical();
debugger_trap_immediate();
@@ -953,17 +972,21 @@
void __bug(char *file, int line)
{
+ console_enter_critical();
console_start_sync();
printk("Xen BUG at %s:%d\n", file, line);
dump_execution_state();
+ console_exit_critical();
panic("Xen BUG at %s:%d\n", file, line);
for ( ; ; ) ;
}
void __warn(char *file, int line)
{
+ console_enter_critical();
printk("Xen WARN at %s:%d\n", file, line);
dump_execution_state();
+ console_exit_critical();
}
diff -r 6315b66fbd5b -r 0a7f637315e4 xen/include/xen/lib.h
--- a/xen/include/xen/lib.h Fri Mar 06 08:46:08 2009 +0100
+++ b/xen/include/xen/lib.h Fri Mar 06 09:33:07 2009 +0100
@@ -100,4 +100,8 @@
extern char *print_tainted(char *str);
extern void add_taint(unsigned);
+/* avoid scheduling during console output in critical paths */
+void console_enter_critical(void);
+void console_exit_critical(void);
+
#endif /* __LIB_H__ */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|