This patch doesn't handle live migration with the balloon driver.
Sorry for inconvenience. I'll post the updated patch tomorrow.
On Tue, Aug 28, 2007 at 03:57:32PM +0900, Isaku Yamahata wrote:
> # HG changeset patch
> # User yamahata@xxxxxxxxxxxxx
> # Date 1188274506 -32400
> # Node ID b2b904e37c102327393049c795fa5cdc8b0e3c10
> # Parent 82ebc14bec8f14281401faa2eefe4ebc6e4d77bc
> rewrite ia64 domain save/restore with foreign p2m exposure
> PATCHNAME: rewrite_ia64_domain_save_restore_foreign_p2m
>
> Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
>
> diff -r 82ebc14bec8f -r b2b904e37c10 tools/libxc/ia64/xc_ia64_linux_restore.c
> --- a/tools/libxc/ia64/xc_ia64_linux_restore.c Tue Aug 28 13:10:18
> 2007 +0900
> +++ b/tools/libxc/ia64/xc_ia64_linux_restore.c Tue Aug 28 13:15:06
> 2007 +0900
> @@ -5,12 +5,17 @@
> *
> * Copyright (c) 2003, K A Fraser.
> * Rewritten for ia64 by Tristan Gingold <tristan.gingold@xxxxxxxx>
> + *
> + * Copyright (c) 2007 Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
> + * Use foreign p2m exposure.
> */
>
> #include <stdlib.h>
> #include <unistd.h>
>
> #include "xg_private.h"
> +#include "xc_ia64_save_restore.h"
> +#include "xc_ia64.h"
>
> #define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10))
>
> @@ -40,6 +45,16 @@ read_exact(int fd, void *buf, size_t cou
> }
>
> static int
> +populate_page_if_necessary(int xc_handle, uint32_t dom, unsigned long gmfn,
> + struct xen_ia64_p2m_table *p2m_table)
> +{
> + if (xc_ia64_p2m_present(p2m_table, gmfn))
> + return 0;
> +
> + return xc_domain_memory_populate_physmap(xc_handle, dom, 1, 0, 0, &gmfn);
> +}
> +
> +static int
> read_page(int xc_handle, int io_fd, uint32_t dom, unsigned long pfn)
> {
> void *mem;
> @@ -78,10 +93,11 @@ xc_domain_restore(int xc_handle, int io_
> /* A copy of the CPU context of the guest. */
> vcpu_guest_context_t ctxt;
>
> - unsigned long *page_array = NULL;
> -
> /* A temporary mapping of the guest's start_info page. */
> start_info_t *start_info;
> +
> + struct xen_ia64_p2m_table p2m_table;
> + xc_ia64_p2m_init(&p2m_table);
>
> if (hvm) {
> ERROR("HVM Restore is unsupported");
> @@ -102,7 +118,7 @@ xc_domain_restore(int xc_handle, int io_
> ERROR("Error when reading version");
> goto out;
> }
> - if (ver != 1) {
> + if (ver != XC_IA64_SR_FORMAT_VER_ONE && ver !=
> XC_IA64_SR_FORMAT_VER_TWO) {
> ERROR("version of save doesn't match");
> goto out;
> }
> @@ -112,25 +128,6 @@ xc_domain_restore(int xc_handle, int io_
> ERROR("Unable to lock_pages ctxt");
> return 1;
> }
> -
> - /* Get pages. */
> - page_array = malloc(p2m_size * sizeof(unsigned long));
> - if (page_array == NULL) {
> - ERROR("Could not allocate memory");
> - goto out;
> - }
> -
> - for ( i = 0; i < p2m_size; i++ )
> - page_array[i] = i;
> -
> - if ( xc_domain_memory_populate_physmap(xc_handle, dom, p2m_size,
> - 0, 0, page_array) )
> - {
> - ERROR("Failed to allocate memory for %ld KB to dom %d.\n",
> - PFN_TO_KB(p2m_size), dom);
> - goto out;
> - }
> - DPRINTF("Allocated memory by %ld KB\n", PFN_TO_KB(p2m_size));
>
> if (!read_exact(io_fd, &domctl.u.arch_setup,
> sizeof(domctl.u.arch_setup))) {
> ERROR("read: domain setup");
> @@ -155,6 +152,34 @@ xc_domain_restore(int xc_handle, int io_
> }
> shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
>
> + if (ver == XC_IA64_SR_FORMAT_VER_TWO) {
> + unsigned int memmap_info_num_pages;
> + unsigned long memmap_size;
> + xen_ia64_memmap_info_t *memmap_info;
> +
> + if (!read_exact(io_fd, &memmap_info_num_pages,
> + sizeof(memmap_info_num_pages))) {
> + ERROR("read: memmap_info_num_pages");
> + goto out;
> + }
> + memmap_size = memmap_info_num_pages * PAGE_SIZE;
> + memmap_info = malloc(memmap_size);
> + if (memmap_info == NULL) {
> + ERROR("Could not allocate memory for memmap_info");
> + goto out;
> + }
> + if (!read_exact(io_fd, memmap_info, memmap_size)) {
> + ERROR("read: memmap_info");
> + goto out;
> + }
> + if (xc_ia64_p2m_map(&p2m_table, xc_handle,
> + dom, memmap_info, IA64_DOM0VP_EFP_ALLOC_PTE)) {
> + ERROR("p2m mapping");
> + goto out;
> + }
> + free(memmap_info);
> + }
> +
> DPRINTF("Reloading memory pages: 0%%\n");
>
> while (1) {
> @@ -165,17 +190,25 @@ xc_domain_restore(int xc_handle, int io_
> if (gmfn == INVALID_MFN)
> break;
>
> + if (populate_page_if_necessary(xc_handle, dom, gmfn, &p2m_table) <
> 0) {
> + ERROR("can not populate page 0x%lx", gmfn);
> + goto out;
> + }
> if (read_page(xc_handle, io_fd, dom, gmfn) < 0)
> goto out;
> }
>
> DPRINTF("Received all pages\n");
>
> - /* Get the list of PFNs that are not in the psuedo-phys map */
> - {
> + /*
> + * Get the list of PFNs that are not in the psuedo-phys map
> + *
> + * Now we allocate pages on demand so that we don't need to free
> + * pages here. just read and discard.
> + */
> + if (ver == XC_IA64_SR_FORMAT_VER_ONE) {
> unsigned int count;
> unsigned long *pfntab;
> - int rc;
>
> if (!read_exact(io_fd, &count, sizeof(count))) {
> ERROR("Error when reading pfn count");
> @@ -189,34 +222,11 @@ xc_domain_restore(int xc_handle, int io_
> }
>
> if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) {
> + free(pfntab);
> ERROR("Error when reading pfntab");
> goto out;
> }
> -
> - DPRINTF ("Try to free %u pages\n", count);
> -
> - for (i = 0; i < count; i++) {
> -
> - volatile unsigned long pfn;
> -
> - struct xen_memory_reservation reservation = {
> - .nr_extents = 1,
> - .extent_order = 0,
> - .domid = dom
> - };
> - set_xen_guest_handle(reservation.extent_start,
> - (unsigned long *)&pfn);
> -
> - pfn = pfntab[i];
> - rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation,
> - &reservation);
> - if (rc != 1) {
> - ERROR("Could not decrease reservation : %d", rc);
> - goto out;
> - }
> - }
> -
> - DPRINTF("Decreased reservation by %d pages\n", count);
> + free(pfntab);
> }
>
>
> @@ -274,6 +284,10 @@ xc_domain_restore(int xc_handle, int io_
> munmap (shared_info, PAGE_SIZE);
>
> /* Uncanonicalise the suspend-record frame number and poke resume rec. */
> + if (populate_page_if_necessary(xc_handle, dom, gmfn, &p2m_table)) {
> + ERROR("cannot populate page 0x%lx", gmfn);
> + goto out;
> + }
> start_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
> PROT_READ | PROT_WRITE, gmfn);
> if (start_info == NULL) {
> @@ -309,8 +323,7 @@ xc_domain_restore(int xc_handle, int io_
> if ((rc != 0) && (dom != 0))
> xc_domain_destroy(xc_handle, dom);
>
> - if (page_array != NULL)
> - free(page_array);
> + xc_ia64_p2m_unmap(&p2m_table);
>
> unlock_pages(&ctxt, sizeof(ctxt));
>
> diff -r 82ebc14bec8f -r b2b904e37c10 tools/libxc/ia64/xc_ia64_linux_save.c
> --- a/tools/libxc/ia64/xc_ia64_linux_save.c Tue Aug 28 13:10:18 2007 +0900
> +++ b/tools/libxc/ia64/xc_ia64_linux_save.c Tue Aug 28 13:15:06 2007 +0900
> @@ -5,6 +5,9 @@
> *
> * Copyright (c) 2003, K A Fraser.
> * Rewritten for ia64 by Tristan Gingold <tristan.gingold@xxxxxxxx>
> + *
> + * Copyright (c) 2007 Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
> + * Use foreign p2m exposure.
> */
>
> #include <inttypes.h>
> @@ -14,6 +17,9 @@
> #include <sys/time.h>
>
> #include "xg_private.h"
> +#include "xc_ia64.h"
> +#include "xc_ia64_save_restore.h"
> +#include "xc_efi.h"
>
> /*
> ** Default values for important tuning parameters. Can override by passing
> @@ -151,8 +157,6 @@ xc_domain_save(int xc_handle, int io_fd,
> /* A copy of the CPU context of the guest. */
> vcpu_guest_context_t ctxt;
>
> - unsigned long *page_array = NULL;
> -
> /* Live mapping of shared info structure */
> shared_info_t *live_shinfo = NULL;
>
> @@ -180,6 +184,16 @@ xc_domain_save(int xc_handle, int io_fd,
> unsigned long *to_skip = NULL;
>
> char *mem;
> +
> + unsigned int memmap_info_num_pages;
> + unsigned long memmap_size = 0;
> + xen_ia64_memmap_info_t *memmap_info_live = NULL;
> + xen_ia64_memmap_info_t *memmap_info = NULL;
> + void *memmap_desc_start;
> + void *memmap_desc_end;
> + void *p;
> + struct xen_ia64_p2m_table p2m_table;
> + xc_ia64_p2m_init(&p2m_table);
>
> if (debug)
> fprintf(stderr, "xc_linux_save (ia64): started dom=%d\n", dom);
> @@ -217,12 +231,6 @@ xc_domain_save(int xc_handle, int io_fd,
> }
>
> p2m_size = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom);
> -
> - page_array = malloc(p2m_size * sizeof(unsigned long));
> - if (page_array == NULL) {
> - ERROR("Could not allocate memory");
> - goto out;
> - }
>
> /* This is expected by xm restore. */
> if (!write_exact(io_fd, &p2m_size, sizeof(unsigned long))) {
> @@ -236,7 +244,7 @@ xc_domain_save(int xc_handle, int io_fd,
> The version is hard-coded, don't forget to change the restore code
> too! */
> {
> - unsigned long version = 1;
> + unsigned long version = XC_IA64_SR_FORMAT_VER_CURRENT;
>
> if (!write_exact(io_fd, &version, sizeof(unsigned long))) {
> ERROR("write: version");
> @@ -304,6 +312,38 @@ xc_domain_save(int xc_handle, int io_fd,
>
> }
>
> + memmap_info_num_pages = live_shinfo->arch.memmap_info_num_pages;
> + memmap_size = PAGE_SIZE * memmap_info_num_pages;
> + memmap_info_live = xc_map_foreign_range(xc_handle, info.domid,
> + memmap_size, PROT_READ,
> +
> live_shinfo->arch.memmap_info_pfn);
> + if (memmap_info_live == NULL) {
> + PERROR("Could not map memmap info.");
> + goto out;
> + }
> + memmap_info = malloc(memmap_size);
> + if (memmap_info == NULL) {
> + PERROR("Could not allocate memmap info memory");
> + goto out;
> + }
> + memcpy(memmap_info, memmap_info_live, memmap_size);
> + munmap(memmap_info_live, memmap_size);
> + memmap_info_live = NULL;
> +
> + if (xc_ia64_p2m_map(&p2m_table, xc_handle, dom, memmap_info, 0) < 0) {
> + PERROR("xc_ia64_p2m_map");
> + goto out;
> + }
> + if (!write_exact(io_fd,
> + &memmap_info_num_pages, sizeof(memmap_info_num_pages)))
> {
> + PERROR("write: arch.memmap_info_num_pages");
> + goto out;
> + }
> + if (!write_exact(io_fd, memmap_info, memmap_size)) {
> + PERROR("write: memmap_info");
> + goto out;
> + }
> +
> sent_last_iter = p2m_size;
> total_sent = 0;
>
> @@ -313,13 +353,6 @@ xc_domain_save(int xc_handle, int io_fd,
>
> sent_this_iter = 0;
> skip_this_iter = 0;
> -
> - /* Get the pfn list, as it may change. */
> - if (xc_ia64_get_pfn_list(xc_handle, dom, page_array,
> - 0, p2m_size) != p2m_size) {
> - ERROR("Could not get the page frame list");
> - goto out;
> - }
>
> /* Dirtied pages won't be saved.
> slightly wasteful to peek the whole array evey time,
> @@ -334,45 +367,64 @@ xc_domain_save(int xc_handle, int io_fd,
> }
>
> /* Start writing out the saved-domain record. */
> - for (N = 0; N < p2m_size; N++) {
> - if (page_array[N] == INVALID_MFN)
> + memmap_desc_start = &memmap_info->memdesc;
> + memmap_desc_end = memmap_desc_start + memmap_info->efi_memmap_size;
> + for (p = memmap_desc_start;
> + p < memmap_desc_end;
> + p += memmap_info->efi_memdesc_size) {
> + efi_memory_desc_t *md = p;
> + if (md->type != EFI_CONVENTIONAL_MEMORY ||
> + md->attribute != EFI_MEMORY_WB ||
> + md->num_pages == 0)
> continue;
> - if (!last_iter) {
> - if (test_bit(N, to_skip) && test_bit(N, to_send))
> - skip_this_iter++;
> - if (test_bit(N, to_skip) || !test_bit(N, to_send))
> +
> + for (N = md->phys_addr >> PAGE_SHIFT;
> + N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >>
> + PAGE_SHIFT;
> + N++) {
> +
> + if (!xc_ia64_p2m_allocated(&p2m_table, N))
> continue;
> +
> + if (!last_iter) {
> + if (test_bit(N, to_skip) && test_bit(N, to_send))
> + skip_this_iter++;
> + if (test_bit(N, to_skip) || !test_bit(N, to_send))
> + continue;
> + }
> +
> + if (debug)
> + fprintf(stderr, "xc_linux_save: page %lx (%lu/%lu)\n",
> + xc_ia64_p2m_mfn(&p2m_table, N),
> + N, p2m_size);
> +
> + mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
> + PROT_READ|PROT_WRITE, N);
> + if (mem == NULL) {
> + /* The page may have move.
> + It will be remarked dirty.
> + FIXME: to be tracked. */
> + fprintf(stderr, "cannot map mfn page %lx gpfn %lx: %s\n",
> + xc_ia64_p2m_mfn(&p2m_table, N),
> + N, safe_strerror(errno));
> + continue;
> + }
> +
> + if (!write_exact(io_fd, &N, sizeof(N))) {
> + ERROR("write: p2m_size");
> + munmap(mem, PAGE_SIZE);
> + goto out;
> + }
> +
> + if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) {
> + ERROR("Error when writing to state file (5)");
> + munmap(mem, PAGE_SIZE);
> + goto out;
> + }
> + munmap(mem, PAGE_SIZE);
> + sent_this_iter++;
> + total_sent++;
> }
> -
> - if (debug)
> - fprintf(stderr, "xc_linux_save: page %lx (%lu/%lu)\n",
> - page_array[N], N, p2m_size);
> -
> - mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
> - PROT_READ|PROT_WRITE, N);
> - if (mem == NULL) {
> - /* The page may have move.
> - It will be remarked dirty.
> - FIXME: to be tracked. */
> - fprintf(stderr, "cannot map mfn page %lx gpfn %lx: %s\n",
> - page_array[N], N, safe_strerror(errno));
> - continue;
> - }
> -
> - if (!write_exact(io_fd, &N, sizeof(N))) {
> - ERROR("write: p2m_size");
> - munmap(mem, PAGE_SIZE);
> - goto out;
> - }
> -
> - if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) {
> - ERROR("Error when writing to state file (5)");
> - munmap(mem, PAGE_SIZE);
> - goto out;
> - }
> - munmap(mem, PAGE_SIZE);
> - sent_this_iter++;
> - total_sent++;
> }
>
> if (last_iter)
> @@ -420,38 +472,6 @@ xc_domain_save(int xc_handle, int io_fd,
> }
> }
>
> - /* Send through a list of all the PFNs that were not in map at the close
> */
> - {
> - unsigned int i,j;
> - unsigned long pfntab[1024];
> -
> - for (i = 0, j = 0; i < p2m_size; i++) {
> - if (page_array[i] == INVALID_MFN)
> - j++;
> - }
> -
> - if (!write_exact(io_fd, &j, sizeof(unsigned int))) {
> - ERROR("Error when writing to state file (6a)");
> - goto out;
> - }
> -
> - for (i = 0, j = 0; i < p2m_size; ) {
> -
> - if (page_array[i] == INVALID_MFN)
> - pfntab[j++] = i;
> -
> - i++;
> - if (j == 1024 || i == p2m_size) {
> - if (!write_exact(io_fd, &pfntab, sizeof(unsigned long)*j)) {
> - ERROR("Error when writing to state file (6b)");
> - goto out;
> - }
> - j = 0;
> - }
> - }
> -
> - }
> -
> if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) {
> ERROR("Could not get vcpu context");
> goto out;
> @@ -494,13 +514,17 @@ xc_domain_save(int xc_handle, int io_fd,
> }
> }
>
> - free(page_array);
> unlock_pages(to_send, bitmap_size);
> free(to_send);
> unlock_pages(to_skip, bitmap_size);
> free(to_skip);
> if (live_shinfo)
> munmap(live_shinfo, PAGE_SIZE);
> + if (memmap_info_live)
> + munmap(memmap_info_live, memmap_size);
> + if (memmap_info)
> + free(memmap_info);
> + xc_ia64_p2m_unmap(&p2m_table);
>
> fprintf(stderr,"Save exit rc=%d\n",rc);
>
> diff -r 82ebc14bec8f -r b2b904e37c10 tools/libxc/ia64/xc_ia64_save_restore.h
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ b/tools/libxc/ia64/xc_ia64_save_restore.h Tue Aug 28 13:15:06 2007 +0900
> @@ -0,0 +1,44 @@
> +/******************************************************************************
> + * xc_ia64_save_restore.h
> + *
> + * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
> + * VA Linux Systems Japan K.K.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
> + *
> + */
> +
> +#ifndef XC_IA64_SAVE_RESTORE_H
> +#define XC_IA64_SR_H
> +
> + /* introduced changeset 10692:306d7857928c of xen-ia64-unstable.ht */
> +#define XC_IA64_SR_FORMAT_VER_ONE 1UL
> + /* using foreign p2m exposure version */
> +#define XC_IA64_SR_FORMAT_VER_TWO 2UL
> +#define XC_IA64_SR_FORMAT_VER_MAX 2UL
> +
> +#define XC_IA64_SR_FORMAT_VER_CURRENT XC_IA64_SR_FORMAT_VER_TWO
> +
> +#endif /* XC_IA64_SAVE_RESTORE_H */
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-set-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> _______________________________________________
> Xen-ia64-devel mailing list
> Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-ia64-devel
--
yamahata
_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel
|