Hi,
Qemu-dm might be blocked with fsync system call over 3 seconds
when the dom0 is overloaded.
It causes SMP Windows 2008 crashes with Bug Check 0x101.
0x101 indicates that an expected clock interrupt on a secondary
processor, in a multi-processor system, was not received within
the allocated interval.
It can be easily reproduced with the following modification:
diff -r 76c9cf11ce23 tools/ioemu/block-raw.c
--- a/tools/ioemu/block-raw.c Fri Mar 21 09:45:34 2008 +0000
+++ b/tools/ioemu/block-raw.c Mon Mar 24 16:28:16 2008 +0900
@@ -603,6 +603,11 @@ static void raw_flush(BlockDriverState *
static void raw_flush(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
+#if 1 /* reproduce Windows Bug Check 0x101 */
+ extern int send_vcpu;
+ if (send_vcpu != 0)
+ sleep(4);
+#endif
fsync(s->fd);
}
An attached patch fixes it. However I think the root cause is
that a timer event can't interrupt an i/o emulation.
How should we fix it?
Thanks,
Kouya
diff -r 76c9cf11ce23 tools/ioemu/block-raw.c
--- a/tools/ioemu/block-raw.c Fri Mar 21 09:45:34 2008 +0000
+++ b/tools/ioemu/block-raw.c Mon Mar 24 17:56:19 2008 +0900
@@ -496,6 +496,10 @@ static void raw_aio_cancel(BlockDriverAI
pacb = &acb->next;
}
}
+
+static void aio_fsync_cb(void *opague, int ret)
+{
+}
#endif
static void raw_close(BlockDriverState *bs)
@@ -602,8 +606,20 @@ static int raw_create(const char *filena
static void raw_flush(BlockDriverState *bs)
{
+#ifdef NO_AIO
BDRVRawState *s = bs->opaque;
fsync(s->fd);
+#else
+ RawAIOCB *acb;
+
+ acb = raw_aio_setup(bs, 0, NULL, 0, aio_fsync_cb, NULL);
+ if (!acb)
+ return;
+ if (aio_fsync(O_SYNC, &acb->aiocb) < 0) {
+ qemu_aio_release(acb);
+ return;
+ }
+#endif
}
BlockDriver bdrv_raw = {
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|