BTW, this depends on "libxl: allow guest to write "control/shutdown"
xenstore node"
I'm vaguely hopeful that it will cause RHEL6 HVM suspend/migration to
fail gracefully instead of collapsing in a heap.
Ian.
On Tue, 2011-02-08 at 17:24 +0000, Ian Campbell wrote:
> # HG changeset patch
> # User Ian Campbell <ian.campbell@xxxxxxxxxx>
> # Date 1297185819 0
> # Node ID d631a4996cbc69a7fa8489f28d4a3313db12e77a
> # Parent a46b91cd8202726aecd9ddefd8e75faff48144d6
> libxl/xl: improve behaviour when guest fails to suspend itself.
>
> The PV suspend protocol requires guest co-operating whereby the guest
> must respond to a suspend request written to the xenstore control node
> by clearing the node and then making a suspend hypercall.
>
> Currently when a guest fails to do this libxl times out and returns
> a generic failure code to the caller.
>
> In response to this failure xl attempts to resume the guest. However
> if the guest has not responded to the suspend request then the is no
> guarantee that the guest has made the suspend hypercall (in fact it is
> quite unlikely). Since the resume process attempts to modify the
> return value of the hypercall (to indicate a cancelled suspend) this
> results in the guest eax/rax register being corrupted!
>
> To fix this change libxl to do the following:
> * Wait for the guest to acknowledge the suspend request.
> - on timeout cancel the suspend request.
> - if cancellation is successful then return a new error code to
> indicate that the guest is not responding.
> - if the cancel does not succeed then we raced with the guest
> which actually did acknowledge at the last minute, so
> continue.
> * Wait for the guest to suspend.
> - on timeout return the standard error code as before
> * Guest successfully suspended, return success.
>
> Lastly in xl do not attempt to resume a guest if it has not responded
> to the suspend request.
>
> Tested by live migration of PVops kernels which either ignore the
> suspend request, have already crashed and those which suspend/resume
> correctly. In the first two cases the source domain is left alone (and
> continues to function in the first case) and in the third the
> migration is successful.
>
> Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
>
> diff -r a46b91cd8202 -r d631a4996cbc tools/libxl/libxl.h
> --- a/tools/libxl/libxl.h Tue Feb 08 16:26:07 2011 +0000
> +++ b/tools/libxl/libxl.h Tue Feb 08 17:23:39 2011 +0000
> @@ -239,6 +239,7 @@ enum {
> ERROR_NOMEM = -5,
> ERROR_INVAL = -6,
> ERROR_BADFAIL = -7,
> + ERROR_GUEST_TIMEDOUT = -8,
> };
>
> #define LIBXL_VERSION 0
> diff -r a46b91cd8202 -r d631a4996cbc tools/libxl/libxl_dom.c
> --- a/tools/libxl/libxl_dom.c Tue Feb 08 16:26:07 2011 +0000
> +++ b/tools/libxl/libxl_dom.c Tue Feb 08 17:23:39 2011 +0000
> @@ -320,6 +320,7 @@ struct suspendinfo {
> int domid;
> int hvm;
> unsigned int flags;
> + int guest_responded;
> };
>
> static int libxl__domain_suspend_common_switch_qemu_logdirty(int domid,
> unsigned int enable, void *data)
> @@ -347,8 +348,9 @@ static int libxl__domain_suspend_common_
> unsigned long s_state = 0;
> int ret;
> char *path, *state = "suspend";
> - int watchdog = 60;
> + int watchdog;
> libxl_ctx *ctx = libxl__gc_owner(si->gc);
> + xs_transaction_t t;
>
> if (si->hvm)
> xc_get_hvm_param(ctx->xch, si->domid, HVM_PARAM_ACPI_S_STATE,
> &s_state);
> @@ -363,6 +365,7 @@ static int libxl__domain_suspend_common_
> LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "xc_await_suspend failed
> ret=%d", ret);
> return 0;
> }
> + si->guest_responded = 1;
> return 1;
> }
> path = libxl__sprintf(si->gc, "%s/control/shutdown",
> libxl__xs_get_dompath(si->gc, si->domid));
> @@ -376,8 +379,56 @@ static int libxl__domain_suspend_common_
> xc_domain_shutdown(ctx->xch, si->domid, SHUTDOWN_suspend);
> }
> }
> +
> + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "wait for the guest to acknowledge
> suspend request");
> + watchdog = 60;
> + while (!strcmp(state, "suspend") && watchdog > 0) {
> + usleep(100000);
> +
> + state = libxl__xs_read(si->gc, XBT_NULL, path);
> +
> + watchdog--;
> + }
> +
> + /*
> + * Guest appears to not be responding. Cancel the suspend request.
> + *
> + * We re-read the suspend node and clear it within a transaction
> + * in order to handle the case where we race against the guest
> + * catching up and acknowledging the request at the last minute.
> + */
> + if (!strcmp(state, "suspend")) {
> + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest didn't acknowledge suspend,
> cancelling request");
> + retry_transaction:
> + t = xs_transaction_start(ctx->xsh);
> +
> + state = libxl__xs_read(si->gc, t, path);
> +
> + if (!strcmp(state, "suspend"))
> + libxl__xs_write(si->gc, t, path, "");
> +
> + if (!xs_transaction_end(ctx->xsh, t, 0))
> + if (errno == EAGAIN)
> + goto retry_transaction;
> +
> + }
> +
> + /*
> + * Final check for guest acknowledgement. The guest may have
> + * acknowledged while we were cancelling the request in which case
> + * we lost the race while cancelling and should continue.
> + */
> + if (!strcmp(state, "suspend")) {
> + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest didn't acknowledge suspend,
> request cancelled");
> + return 0;
> + }
> +
> + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "guest acknowledged suspend request");
> + si->guest_responded = 1;
> +
> LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "wait for the guest to suspend");
> - while (!strcmp(state, "suspend") && watchdog > 0) {
> + watchdog = 60;
> + while (watchdog > 0) {
> xc_domaininfo_t info;
>
> usleep(100000);
> @@ -386,17 +437,17 @@ static int libxl__domain_suspend_common_
> int shutdown_reason;
>
> shutdown_reason = (info.flags >> XEN_DOMINF_shutdownshift) &
> XEN_DOMINF_shutdownmask;
> - if (shutdown_reason == SHUTDOWN_suspend)
> + if (shutdown_reason == SHUTDOWN_suspend) {
> + LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "guest has suspended");
> return 1;
> + }
> }
> - state = libxl__xs_read(si->gc, XBT_NULL, path);
> +
> watchdog--;
> }
> - if (!strcmp(state, "suspend")) {
> - LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest didn't suspend in time");
> - libxl__xs_write(si->gc, XBT_NULL, path, "");
> - }
> - return 1;
> +
> + LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "guest did not suspend");
> + return 0;
> }
>
> int libxl__domain_suspend_common(libxl_ctx *ctx, uint32_t domid, int fd,
> @@ -418,6 +469,7 @@ int libxl__domain_suspend_common(libxl_c
> si.hvm = hvm;
> si.gc = &gc;
> si.suspend_eventchn = -1;
> + si.guest_responded = 0;
>
> si.xce = xc_evtchn_open(NULL, 0);
> if (si.xce == NULL)
> @@ -441,8 +493,14 @@ int libxl__domain_suspend_common(libxl_c
>
> rc = xc_domain_save(ctx->xch, fd, domid, 0, 0, flags, &callbacks, hvm);
> if ( rc ) {
> - LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "saving domain");
> - rc = ERROR_FAIL;
> + LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "saving domain: %s",
> + si.guest_responded ?
> + "domain responded to suspend request" :
> + "domain did not respond to suspend request");
> + if ( !si.guest_responded )
> + rc = ERROR_GUEST_TIMEDOUT;
> + else
> + rc = ERROR_FAIL;
> }
>
> if (si.suspend_eventchn > 0)
> diff -r a46b91cd8202 -r d631a4996cbc tools/libxl/xl_cmdimpl.c
> --- a/tools/libxl/xl_cmdimpl.c Tue Feb 08 16:26:07 2011 +0000
> +++ b/tools/libxl/xl_cmdimpl.c Tue Feb 08 17:23:39 2011 +0000
> @@ -2583,7 +2583,10 @@ static void migrate_domain(const char *d
> if (rc) {
> fprintf(stderr, "migration sender: libxl_domain_suspend failed"
> " (rc=%d)\n", rc);
> - goto failed_resume;
> + if (rc == ERROR_GUEST_TIMEDOUT)
> + goto failed_suspend;
> + else
> + goto failed_resume;
> }
>
> //fprintf(stderr, "migration sender: Transfer complete.\n");
> @@ -2661,6 +2664,12 @@ static void migrate_domain(const char *d
> libxl_domain_destroy(&ctx, domid, 1); /* bang! */
> fprintf(stderr, "Migration successful.\n");
> exit(0);
> +
> + failed_suspend:
> + close(send_fd);
> + migration_child_report(child, recv_fd);
> + fprintf(stderr, "Migration failed, failed to suspend at sender.\n");
> + exit(-ERROR_FAIL);
>
> failed_resume:
> close(send_fd);
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|