> > We should make xc_linux_save cope more gracefully with
> > arch.pfn_to_mfn_frame_list_list being NULL, have xc_linux_save zero
the
> > field in shared_info before writing it out (its an MFN and not valid
> > anyhow), and then move the field's re-initialisation a little later
in
> > the post_suspend function to close the race.
>
> I've done some testing with the below, and it seems to do the trick.
30 seconds is quite a time to wait... Wouldn't a second be more
appropriate?
While you're at it, I'd be grateful if you could move the shared_info
assignment in linux's post_suspend() and setup_arch() [i386 and x86_64]
below the list building code.
Thanks,
Ian
> regards
> john
>
>
> # HG changeset patch
> # User john.levon@xxxxxxx
> # Date 1169600022 28800
> # Node ID 3121e0cb7be68fa46c59f61422f05adc03ec5a5e
> # Parent a75413c0072fa5b892bd8b6a05c1f1d3435bb093
> Close save-after-restore race. Make xc_linux_save() wait for the
> frame_list_list MFN to be updated by the domain before trying to use
it.
>
> Signed-off-by: John Levon <john.levon@xxxxxxx>
>
> diff --git a/tools/libxc/xc_linux_save.c b/tools/libxc/xc_linux_save.c
> --- a/tools/libxc/xc_linux_save.c
> +++ b/tools/libxc/xc_linux_save.c
> @@ -403,6 +403,33 @@ static int suspend_and_state(int (*suspe
> return -1;
> }
>
> +/*
> +** Map the top-level page of MFNs from the guest. The guest might not
have
> +** finished resuming from a previous restore operation, so we wait a
while
> for
> +** it to update the MFN to a reasonable value.
> +*/
> +static void *map_frame_list_list(int xc_handle, uint32_t dom,
> + shared_info_t *shinfo)
> +{
> + int count = 3000;
> + void *p;
> +
> + while (count-- && shinfo->arch.pfn_to_mfn_frame_list_list == 0)
> + usleep(10000);
> +
> + if (shinfo->arch.pfn_to_mfn_frame_list_list == 0) {
> + ERROR("Timed out waiting for frame list updated.");
> + return NULL;
> + }
> +
> + p = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
> +
shinfo->arch.pfn_to_mfn_frame_list_list);
> +
> + if (p == NULL)
> + ERROR("Couldn't map p2m_frame_list_list (errno %d)", errno);
> +
> + return p;
> +}
>
> /*
> ** During transfer (or in the state file), all page-table pages must
be
> @@ -663,14 +690,11 @@ int xc_linux_save(int xc_handle, int io_
>
> max_pfn = live_shinfo->arch.max_pfn;
>
> - live_p2m_frame_list_list =
> - xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
> - live_shinfo-
> >arch.pfn_to_mfn_frame_list_list);
> -
> - if (!live_p2m_frame_list_list) {
> - ERROR("Couldn't map p2m_frame_list_list (errno %d)", errno);
> - goto out;
> - }
> + live_p2m_frame_list_list = map_frame_list_list(xc_handle, dom,
> + live_shinfo);
> +
> + if (!live_p2m_frame_list_list)
> + goto out;
>
> live_p2m_frame_list =
> xc_map_foreign_batch(xc_handle, dom, PROT_READ,
> @@ -1169,8 +1193,14 @@ int xc_linux_save(int xc_handle, int io_
> ctxt.ctrlreg[3] =
> xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3])));
>
> + /*
> + * Reset the MFN to be a known-invalid value. See
> map_frame_list_list().
> + */
> + memcpy(page, live_shinfo, PAGE_SIZE);
> + ((shared_info_t *)page)->arch.pfn_to_mfn_frame_list_list = 0;
> +
> if (!write_exact(io_fd, &ctxt, sizeof(ctxt)) ||
> - !write_exact(io_fd, live_shinfo, PAGE_SIZE)) {
> + !write_exact(io_fd, page, PAGE_SIZE)) {
> ERROR("Error when writing to state file (1) (errno %d)",
errno);
> goto out;
> }
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|