# HG changeset patch
# User smh22@xxxxxxxxxxxxxxxxxxxx
# Node ID fe3a892b33b4ccd3593bde788ceafa0668227450
# Parent 9b345321fd0676436d399c6eca0afd625b886ca4
Many fixes for save/restore and related areas for PAE in particular. Now
should be able to save/restore successfully on machines with up to 16GB
and any size of guest.
Signed-off-by: Steven Hand <steven@xxxxxxxxxxxxx>
diff -r 9b345321fd06 -r fe3a892b33b4
linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Wed Nov 16 14:50:36 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/ioremap.c Wed Nov 16 16:45:03 2005
@@ -136,21 +136,19 @@
}
EXPORT_SYMBOL(direct_kernel_remap_pfn_range);
-/* FIXME: This is horribly broken on PAE */
static int lookup_pte_fn(
pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
{
- unsigned long *ptep = (unsigned long *)data;
+ uint64_t *ptep = (uint64_t *)data;
if (ptep)
- *ptep = (pfn_to_mfn(page_to_pfn(pte_page)) <<
- PAGE_SHIFT) |
- ((unsigned long)pte & ~PAGE_MASK);
+ *ptep = ((uint64_t)pfn_to_mfn(page_to_pfn(pte_page)) <<
+ PAGE_SHIFT) | ((unsigned long)pte & ~PAGE_MASK);
return 0;
}
int create_lookup_pte_addr(struct mm_struct *mm,
unsigned long address,
- unsigned long *ptep)
+ uint64_t *ptep)
{
return generic_page_range(mm, address, PAGE_SIZE, lookup_pte_fn, ptep);
}
diff -r 9b345321fd06 -r fe3a892b33b4
linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c
--- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Wed Nov 16 14:50:36 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Wed Nov 16 16:45:03 2005
@@ -412,7 +412,7 @@
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
unsigned int i, op = 0;
struct grant_handle_pair *handle;
- unsigned long ptep;
+ uint64_t ptep;
int ret;
for ( i = 0; i < nr_pages; i++)
@@ -427,9 +427,9 @@
op++;
if (create_lookup_pte_addr(
- blktap_vma->vm_mm,
- MMAP_VADDR(user_vstart, idx, i),
- &ptep) !=0) {
+ blktap_vma->vm_mm,
+ MMAP_VADDR(user_vstart, idx, i),
+ &ptep) !=0) {
DPRINTK("Couldn't get a pte addr!\n");
return;
}
@@ -705,7 +705,7 @@
unsigned long uvaddr;
unsigned long kvaddr;
- unsigned long ptep;
+ uint64_t ptep;
uvaddr = MMAP_VADDR(user_vstart, pending_idx, i);
kvaddr = MMAP_VADDR(mmap_vstart, pending_idx, i);
diff -r 9b345321fd06 -r fe3a892b33b4
linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c
--- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Wed Nov 16
14:50:36 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Wed Nov 16
16:45:03 2005
@@ -152,7 +152,8 @@
privcmd_mmapbatch_t m;
struct vm_area_struct *vma = NULL;
unsigned long *p, addr;
- unsigned long mfn, ptep;
+ unsigned long mfn;
+ uint64_t ptep;
int i;
if (copy_from_user(&m, (void *)data, sizeof(m))) {
@@ -217,15 +218,39 @@
#endif
#ifndef __ia64__
- case IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN: {
- unsigned long m2pv = (unsigned long)machine_to_phys_mapping;
- pgd_t *pgd = pgd_offset_k(m2pv);
- pud_t *pud = pud_offset(pgd, m2pv);
- pmd_t *pmd = pmd_offset(pud, m2pv);
- unsigned long m2p_start_mfn =
- (*(unsigned long *)pmd) >> PAGE_SHIFT;
- ret = put_user(m2p_start_mfn, (unsigned long *)data) ?
- -EFAULT: 0;
+ case IOCTL_PRIVCMD_GET_MACH2PHYS_MFNS: {
+
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ unsigned long m2pv, m2p_mfn;
+ privcmd_m2pmfns_t m;
+ unsigned long *p;
+ int i;
+
+ if (copy_from_user(&m, (void *)data, sizeof(m)))
+ return -EFAULT;
+
+ m2pv = (unsigned long)machine_to_phys_mapping;
+
+ p = m.arr;
+
+ for(i=0; i < m.num; i++) {
+
+ pgd = pgd_offset_k(m2pv);
+ pud = pud_offset(pgd, m2pv);
+ pmd = pmd_offset(pud, m2pv);
+ m2p_mfn = (*(uint64_t *)pmd >> PAGE_SHIFT)&0xFFFFFFFF;
+
+ if (put_user(m2p_mfn, p + i))
+ return -EFAULT;
+
+ m2pv += (1 << 21);
+ }
+
+ ret = 0;
+ break;
+
}
break;
#endif
diff -r 9b345321fd06 -r fe3a892b33b4
linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h Wed Nov 16
14:50:36 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h Wed Nov 16
16:45:03 2005
@@ -450,11 +450,11 @@
#endif /* !CONFIG_DISCONTIGMEM */
int direct_remap_pfn_range(struct vm_area_struct *vma,
- unsigned long address,
- unsigned long mfn,
- unsigned long size,
- pgprot_t prot,
- domid_t domid);
+ unsigned long address,
+ unsigned long mfn,
+ unsigned long size,
+ pgprot_t prot,
+ domid_t domid);
int direct_kernel_remap_pfn_range(unsigned long address,
unsigned long mfn,
unsigned long size,
@@ -462,7 +462,7 @@
domid_t domid);
int create_lookup_pte_addr(struct mm_struct *mm,
unsigned long address,
- unsigned long *ptep);
+ uint64_t *ptep);
int touch_pte_range(struct mm_struct *mm,
unsigned long address,
unsigned long size);
diff -r 9b345321fd06 -r fe3a892b33b4
linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h
--- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Wed Nov 16
14:50:36 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h Wed Nov 16
16:45:03 2005
@@ -541,7 +541,7 @@
int create_lookup_pte_addr(struct mm_struct *mm,
unsigned long address,
- unsigned long *ptep);
+ uint64_t *ptep);
int touch_pte_range(struct mm_struct *mm,
unsigned long address,
diff -r 9b345321fd06 -r fe3a892b33b4
linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h
--- a/linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h Wed Nov
16 14:50:36 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/linux-public/privcmd.h Wed Nov
16 16:45:03 2005
@@ -55,6 +55,11 @@
unsigned long *arr; /* array of mfns - top nibble set on err */
} privcmd_mmapbatch_t;
+typedef struct privcmd_m2pmfns {
+ int num; /* max number of mfns to return */
+ unsigned long *arr; /* array of mfns */
+} privcmd_m2pmfns_t;
+
typedef struct privcmd_blkmsg
{
unsigned long op;
@@ -69,12 +74,11 @@
*/
#define IOCTL_PRIVCMD_HYPERCALL \
_IOC(_IOC_NONE, 'P', 0, sizeof(privcmd_hypercall_t))
-
#define IOCTL_PRIVCMD_MMAP \
_IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t))
#define IOCTL_PRIVCMD_MMAPBATCH \
_IOC(_IOC_NONE, 'P', 3, sizeof(privcmd_mmapbatch_t))
-#define IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN \
+#define IOCTL_PRIVCMD_GET_MACH2PHYS_MFNS \
_IOC(_IOC_READ, 'P', 4, sizeof(unsigned long))
#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
diff -r 9b345321fd06 -r fe3a892b33b4 tools/libxc/xc_linux_restore.c
--- a/tools/libxc/xc_linux_restore.c Wed Nov 16 14:50:36 2005
+++ b/tools/libxc/xc_linux_restore.c Wed Nov 16 16:45:03 2005
@@ -13,13 +13,13 @@
#include "xg_save_restore.h"
/* max mfn of the whole machine */
-static uint32_t max_mfn;
+static unsigned long max_mfn;
/* virtual starting address of the hypervisor */
-static uint32_t hvirt_start;
+static unsigned long hvirt_start;
/* #levels of page tables used by the currrent guest */
-static uint32_t pt_levels;
+static unsigned int pt_levels;
/* total number of pages used by the current guest */
static unsigned long max_pfn;
@@ -49,7 +49,6 @@
return (r == count) ? 1 : 0;
}
-
/*
** In the state file (or during transfer), all page-table pages are
@@ -60,23 +59,11 @@
*/
int uncanonicalize_pagetable(unsigned long type, void *page)
{
- int i, pte_last, xen_start, xen_end;
+ int i, pte_last;
unsigned long pfn;
uint64_t pte;
- /*
- ** We need to determine which entries in this page table hold
- ** reserved hypervisor mappings. This depends on the current
- ** page table type as well as the number of paging levels.
- */
- xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8);
-
- if (pt_levels == 2 && type == L2TAB)
- xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT);
-
- if (pt_levels == 3 && type == L3TAB)
- xen_start = L3_PAGETABLE_ENTRIES_PAE;
-
+ pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8);
/* Now iterate through the page table, uncanonicalizing each PTE */
for(i = 0; i < pte_last; i++) {
@@ -85,13 +72,10 @@
pte = ((uint32_t *)page)[i];
else
pte = ((uint64_t *)page)[i];
-
- if(i >= xen_start && i < xen_end)
- pte = 0;
-
+
if(pte & _PAGE_PRESENT) {
-
- pfn = pte >> PAGE_SHIFT;
+
+ pfn = (pte >> PAGE_SHIFT) & 0xffffffff;
if(pfn >= max_pfn) {
ERR("Frame number in type %lu page table is out of range: "
@@ -101,17 +85,16 @@
}
- if(type == L1TAB)
- pte &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PAT);
- else
- pte &= (PAGE_SIZE - 1) & ~(_PAGE_GLOBAL | _PAGE_PSE);
-
- pte |= p2m[pfn] << PAGE_SHIFT;
-
+ pte &= 0xffffff0000000fffULL;
+ pte |= (uint64_t)p2m[pfn] << PAGE_SHIFT;
+
if(pt_levels == 2)
((uint32_t *)page)[i] = (uint32_t)pte;
else
((uint64_t *)page)[i] = (uint64_t)pte;
+
+
+
}
}
@@ -143,6 +126,9 @@
/* A table of MFNs to map in the current region */
unsigned long *region_mfn = NULL;
+ /* Types of the pfns in the current region */
+ unsigned long region_pfn_type[MAX_BATCH_SIZE];
+
/* A temporary mapping, and a copy, of one frame of guest memory. */
unsigned long *page = NULL;
@@ -233,10 +219,12 @@
if(xc_domain_memory_increase_reservation(
xc_handle, dom, max_pfn, 0, 0, NULL) != 0) {
- ERR("Failed to increase reservation by %lx KB\n", max_pfn);
+ ERR("Failed to increase reservation by %lx KB\n", PFN_TO_KB(max_pfn));
errno = ENOMEM;
goto out;
}
+
+ DPRINTF("Increased domain reservation by %lx KB\n", PFN_TO_KB(max_pfn));
/* Build the pfn-to-mfn table. We choose MFN ordering returned by Xen. */
if (xc_get_pfn_list(xc_handle, dom, p2m, max_pfn) != max_pfn) {
@@ -248,6 +236,7 @@
ERR("Could not initialise for MMU updates");
goto out;
}
+
DPRINTF("Reloading memory pages: 0%%\n");
@@ -261,7 +250,6 @@
while (1) {
int j;
- unsigned long region_pfn_type[MAX_BATCH_SIZE];
this_pc = (n * 100) / max_pfn;
if ( (this_pc - prev_pc) >= 5 )
@@ -322,7 +310,7 @@
if (pagetype == XTAB)
/* a bogus/unmapped page: skip it */
continue;
-
+
if (pfn > max_pfn) {
ERR("pfn out of range");
goto out;
@@ -348,10 +336,20 @@
** A page table page - need to 'uncanonicalize' it, i.e.
** replace all the references to pfns with the corresponding
** mfns for the new domain.
- */
- if(!uncanonicalize_pagetable(pagetype, page))
- goto out;
-
+ **
+ ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
+ ** so we may need to update the p2m after the main loop.
+ ** Hence we defer canonicalization of L1s until then.
+ */
+ if(pt_levels != 3 || pagetype != L1TAB) {
+
+ if(!uncanonicalize_pagetable(pagetype, page)) {
+ ERR("failed uncanonicalize pt!\n");
+ goto out;
+ }
+
+ }
+
} else if(pagetype != NOTAB) {
ERR("Bogus page type %lx page table is out of range: "
@@ -359,7 +357,6 @@
goto out;
}
-
if (verify) {
@@ -386,9 +383,9 @@
}
if (xc_add_mmu_update(xc_handle, mmu,
- (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE,
- pfn)) {
- ERR("machpys mfn=%ld pfn=%ld", mfn, pfn);
+ (((unsigned long long)mfn) << PAGE_SHIFT)
+ | MMU_MACHPHYS_UPDATE, pfn)) {
+ ERR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn);
goto out;
}
} /* end of 'batch' for loop */
@@ -399,14 +396,39 @@
DPRINTF("Received all pages\n");
- if (pt_levels == 3) {
-
- /* Get all PGDs below 4GB. */
+ if(pt_levels == 3) {
+
+ /*
+ ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
+ ** is a little awkward and involves (a) finding all such PGDs and
+ ** replacing them with 'lowmem' versions; (b) upating the p2m[]
+ ** with the new info; and (c) canonicalizing all the L1s using the
+ ** (potentially updated) p2m[].
+ **
+ ** This is relatively slow (and currently involves two passes through
+ ** the pfn_type[] array), but at least seems to be correct. May wish
+ ** to consider more complex approaches to optimize this later.
+ */
+
+ int j, k;
+
+ /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
for (i = 0; i < max_pfn; i++) {
if (((pfn_type[i] & LTABTYPE_MASK)==L3TAB) && (p2m[i]>0xfffffUL)) {
unsigned long new_mfn;
+ uint64_t l3ptes[4];
+ uint64_t *l3tab;
+
+ l3tab = (uint64_t *)
+ xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ, p2m[i]);
+
+ for(j = 0; j < 4; j++)
+ l3ptes[j] = l3tab[j];
+
+ munmap(l3tab, PAGE_SIZE);
if (!(new_mfn=xc_make_page_below_4G(xc_handle, dom, p2m[i]))) {
ERR("Couldn't get a page below 4GB :-(");
@@ -414,15 +436,58 @@
}
p2m[i] = new_mfn;
- if (xc_add_mmu_update(
- xc_handle, mmu,
- (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, i)) {
+ if (xc_add_mmu_update(xc_handle, mmu,
+ (((unsigned long long)new_mfn)
+ << PAGE_SHIFT) |
+ MMU_MACHPHYS_UPDATE, i)) {
ERR("Couldn't m2p on PAE root pgdir");
goto out;
}
+
+ l3tab = (uint64_t *)
+ xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ | PROT_WRITE, p2m[i]);
+
+ for(j = 0; j < 4; j++)
+ l3tab[j] = l3ptes[j];
+
+ munmap(l3tab, PAGE_SIZE);
+
}
}
-
+
+ /* Second pass: find all L1TABs and uncanonicalize them */
+ j = 0;
+
+ for(i = 0; i < max_pfn; i++) {
+
+ if (((pfn_type[i] & LTABTYPE_MASK)==L1TAB)) {
+ region_mfn[j] = p2m[i];
+ j++;
+ }
+
+ if(i == (max_pfn-1) || j == MAX_BATCH_SIZE) {
+
+ if (!(region_base = xc_map_foreign_batch(
+ xc_handle, dom, PROT_READ | PROT_WRITE,
+ region_mfn, j))) {
+ ERR("map batch failed");
+ goto out;
+ }
+
+ for(k = 0; k < j; k++) {
+ if(!uncanonicalize_pagetable(L1TAB,
+ region_base + k*PAGE_SIZE)) {
+ ERR("failed uncanonicalize pt!\n");
+ goto out;
+ }
+ }
+
+ munmap(region_base, j*PAGE_SIZE);
+ j = 0;
+ }
+ }
+
}
@@ -430,6 +495,7 @@
ERR("Error doing finish_mmu_updates()");
goto out;
}
+
/*
* Pin page tables. Do this after writing to them as otherwise Xen
@@ -439,7 +505,7 @@
if ( (pfn_type[i] & LPINTAB) == 0 )
continue;
-
+
switch(pfn_type[i]) {
case (L1TAB|LPINTAB):
@@ -463,22 +529,15 @@
}
pin[nr_pins].arg1.mfn = p2m[i];
+
+ nr_pins ++;
- if (++nr_pins == MAX_PIN_BATCH) {
+ if (i == (max_pfn-1) || nr_pins == MAX_PIN_BATCH) {
if (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) {
ERR("Failed to pin batch of %d page tables", nr_pins);
goto out;
}
- DPRINTF("successfully pinned batch of %d page tables", nr_pins);
nr_pins = 0;
- }
- }
-
- if (nr_pins != 0) {
- if((rc = xc_mmuext_op(xc_handle, pin, nr_pins, dom)) < 0) {
- ERR("Failed (2) to pin batch of %d page tables", nr_pins);
- DPRINTF("rc is %d\n", rc);
- goto out;
}
}
@@ -579,23 +638,20 @@
pfn = ctxt.ctrlreg[3] >> PAGE_SHIFT;
if (pfn >= max_pfn) {
- DPRINTF("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx\n",
- pfn, max_pfn, pfn_type[pfn]);
- ERR("PT base is bad.");
+ ERR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
+ pfn, max_pfn, pfn_type[pfn]);
goto out;
}
if ((pt_levels == 2) && ((pfn_type[pfn]<ABTYPE_MASK) != L2TAB)) {
- DPRINTF("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx\n",
- pfn, max_pfn, pfn_type[pfn], (unsigned long)L2TAB);
- ERR("PT base is bad.");
+ ERR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
+ pfn, max_pfn, pfn_type[pfn], (unsigned long)L2TAB);
goto out;
}
if ((pt_levels == 3) && ((pfn_type[pfn]<ABTYPE_MASK) != L3TAB)) {
- DPRINTF("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx\n",
- pfn, max_pfn, pfn_type[pfn], (unsigned long)L3TAB);
- ERR("PT base is bad.");
+ ERR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
+ pfn, max_pfn, pfn_type[pfn], (unsigned long)L3TAB);
goto out;
}
diff -r 9b345321fd06 -r fe3a892b33b4 tools/libxc/xc_linux_save.c
--- a/tools/libxc/xc_linux_save.c Wed Nov 16 14:50:36 2005
+++ b/tools/libxc/xc_linux_save.c Wed Nov 16 16:45:03 2005
@@ -27,13 +27,13 @@
/* max mfn of the whole machine */
-static uint32_t max_mfn;
+static unsigned long max_mfn;
/* virtual starting address of the hypervisor */
-static uint32_t hvirt_start;
+static unsigned long hvirt_start;
/* #levels of page tables used by the currrent guest */
-static uint32_t pt_levels;
+static unsigned int pt_levels;
/* total number of pages used by the current guest */
static unsigned long max_pfn;
@@ -500,6 +500,70 @@
+static unsigned long *xc_map_m2p(int xc_handle,
+ unsigned long max_mfn,
+ int prot)
+{
+ privcmd_m2pmfns_t m2p_mfns;
+ privcmd_mmap_t ioctlx;
+ privcmd_mmap_entry_t *entries;
+ unsigned long m2p_chunks, m2p_size;
+ unsigned long *m2p;
+ int i, rc;
+
+ m2p_size = M2P_SIZE(max_mfn);
+ m2p_chunks = M2P_CHUNKS(max_mfn);
+
+
+ m2p_mfns.num = m2p_chunks;
+
+ if(!(m2p_mfns.arr = malloc(m2p_chunks * sizeof(unsigned long)))) {
+ ERR("failed to allocate space for m2p mfns!\n");
+ return NULL;
+ }
+
+ if (ioctl(xc_handle, IOCTL_PRIVCMD_GET_MACH2PHYS_MFNS, &m2p_mfns) < 0) {
+ ERR("xc_get_m2p_mfns:");
+ return NULL;
+ }
+
+ if((m2p = mmap(NULL, m2p_size, prot,
+ MAP_SHARED, xc_handle, 0)) == MAP_FAILED) {
+ ERR("failed to mmap m2p");
+ return NULL;
+ }
+
+
+ if(!(entries = malloc(m2p_chunks * sizeof(privcmd_mmap_entry_t)))) {
+ ERR("failed to allocate space for mmap entries!\n");
+ return NULL;
+ }
+
+
+ ioctlx.num = m2p_chunks;
+ ioctlx.dom = DOMID_XEN;
+ ioctlx.entry = entries;
+
+ for(i=0; i < m2p_chunks; i++) {
+
+ entries[i].va = (unsigned long)(((void *)m2p) + (i * M2P_CHUNK_SIZE));
+ entries[i].mfn = m2p_mfns.arr[i];
+ entries[i].npages = M2P_CHUNK_SIZE >> PAGE_SHIFT;
+
+ }
+
+ if((rc = ioctl(xc_handle, IOCTL_PRIVCMD_MMAP, &ioctlx)) < 0) {
+ ERR("ioctl_mmap failed (rc = %d)", rc);
+ return NULL;
+ }
+
+ free(m2p_mfns.arr);
+ free(entries);
+
+ return m2p;
+}
+
+
int xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
uint32_t max_factor, uint32_t flags)
@@ -531,16 +595,12 @@
/* A copy of the pfn-to-mfn table frame list. */
unsigned long *p2m_frame_list = NULL;
- unsigned long m2p_start_mfn;
-
/* Live mapping of shared info structure */
shared_info_t *live_shinfo = NULL;
/* base of the region in which domain memory is mapped */
unsigned char *region_base = NULL;
-
-
/* power of 2 order of max_pfn */
int order_nr;
@@ -563,9 +623,6 @@
max_factor = DEF_MAX_FACTOR;
initialize_mbit_rate();
-
- DPRINTF("xc_linux_save start DOM%u live=%s\n", dom, live ?
- "true" : "false");
if(!get_platform_info(xc_handle, dom,
&max_mfn, &hvirt_start, &pt_levels)) {
@@ -647,11 +704,13 @@
}
/* Setup the mfn_to_pfn table mapping */
- m2p_start_mfn = xc_get_m2p_start_mfn(xc_handle);
- live_m2p = xc_map_foreign_range(xc_handle, DOMID_XEN, M2P_SIZE,
- PROT_READ, m2p_start_mfn);
-
- /* Get a local copy fo the live_P2M_frame_list */
+ if(!(live_m2p = xc_map_m2p(xc_handle, max_mfn, PROT_READ))) {
+ ERR("Failed to map live M2P table");
+ goto out;
+ }
+
+
+ /* Get a local copy of the live_P2M_frame_list */
if(!(p2m_frame_list = malloc(P2M_FL_SIZE))) {
ERR("Couldn't allocate p2m_frame_list array");
goto out;
@@ -662,6 +721,8 @@
for (i = 0; i < max_pfn; i += ulpp) {
if (!translate_mfn_to_pfn(&p2m_frame_list[i/ulpp])) {
ERR("Frame# in pfn-to-mfn frame list is not in pseudophys");
+ ERR("entry %d: p2m_frame_list[%ld] is 0x%lx", i, i/ulpp,
+ p2m_frame_list[i/ulpp]);
goto out;
}
}
@@ -693,20 +754,14 @@
}
-#if 0
- sent_last_iter = 0xFFFFFFFF; /* Pretend we sent a /lot/ last time */
-#else
- sent_last_iter = 1 << 20;
-#endif
+ /* pretend we sent all the pages last iteration */
+ sent_last_iter = max_pfn;
/* calculate the power of 2 order of max_pfn, e.g.
15->4 16->4 17->5 */
for (i = max_pfn-1, order_nr = 0; i ; i >>= 1, order_nr++)
continue;
-
-#undef BITMAP_SIZE
-#define BITMAP_SIZE ((1<<20)/8)
/* Setup to_send / to_fix and to_skip bitmaps */
to_send = malloc(BITMAP_SIZE);
@@ -922,10 +977,8 @@
/* write out pages in batch */
- if (pagetype == XTAB) {
- DPRINTF("SKIP BOGUS page %i mfn %08lx\n", j, pfn_type[j]);
+ if (pagetype == XTAB)
continue;
- }
pagetype &= LTABTYPE_MASK;
@@ -950,10 +1003,10 @@
} /* end of the write out for this batch */
sent_this_iter += batch;
-
+
+ munmap(region_base, batch*PAGE_SIZE);
+
} /* end of this while loop for this iteration */
-
- munmap(region_base, batch*PAGE_SIZE);
skip:
@@ -1027,13 +1080,9 @@
DPRINTF("All memory is saved\n");
- /* Success! */
- rc = 0;
-
- /* ^^^^^^ XXX SMH: hmm.. not sure that's really success! */
-
/* Zero terminate */
- if (!write_exact(io_fd, &rc, sizeof(int))) {
+ i = 0;
+ if (!write_exact(io_fd, &i, sizeof(int))) {
ERR("Error when writing to state file (6)");
goto out;
}
@@ -1043,17 +1092,17 @@
unsigned int i,j;
unsigned long pfntab[1024];
- for ( i = 0, j = 0; i < max_pfn; i++ ) {
- if ( ! is_mapped(live_p2m[i]) )
+ for (i = 0, j = 0; i < max_pfn; i++) {
+ if (!is_mapped(live_p2m[i]))
j++;
}
-
+
if(!write_exact(io_fd, &j, sizeof(unsigned int))) {
ERR("Error when writing to state file (6a)");
goto out;
}
- for ( i = 0, j = 0; i < max_pfn; ) {
+ for (i = 0, j = 0; i < max_pfn; ) {
if (!is_mapped(live_p2m[i]))
pfntab[j++] = i;
@@ -1097,7 +1146,10 @@
ERR("Error when writing to state file (1)");
goto out;
}
-
+
+ /* Success! */
+ rc = 0;
+
out:
if (live_shinfo)
@@ -1110,7 +1162,7 @@
munmap(live_p2m, P2M_SIZE);
if(live_m2p)
- munmap(live_m2p, M2P_SIZE);
+ munmap(live_m2p, M2P_SIZE(max_mfn));
free(pfn_type);
free(pfn_batch);
diff -r 9b345321fd06 -r fe3a892b33b4 tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c Wed Nov 16 14:50:36 2005
+++ b/tools/libxc/xc_private.c Wed Nov 16 16:45:03 2005
@@ -260,18 +260,6 @@
}
-unsigned long xc_get_m2p_start_mfn ( int xc_handle )
-{
- unsigned long mfn;
-
- if ( ioctl( xc_handle, IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN, &mfn ) < 0 )
- {
- perror("xc_get_m2p_start_mfn:");
- return 0;
- }
- return mfn;
-}
-
int xc_get_pfn_list(int xc_handle,
uint32_t domid,
unsigned long *pfn_buf,
diff -r 9b345321fd06 -r fe3a892b33b4 tools/libxc/xg_private.h
--- a/tools/libxc/xg_private.h Wed Nov 16 14:50:36 2005
+++ b/tools/libxc/xg_private.h Wed Nov 16 16:45:03 2005
@@ -153,8 +153,6 @@
} mfn_mapper_t;
-unsigned long xc_get_m2p_start_mfn (int xc_handle);
-
int xc_copy_to_domain_page(int xc_handle, uint32_t domid,
unsigned long dst_pfn, void *src_page);
diff -r 9b345321fd06 -r fe3a892b33b4 tools/libxc/xg_save_restore.h
--- a/tools/libxc/xg_save_restore.h Wed Nov 16 14:50:36 2005
+++ b/tools/libxc/xg_save_restore.h Wed Nov 16 16:45:03 2005
@@ -3,6 +3,8 @@
**
** Defintions and utilities for save / restore.
*/
+
+#include "xc_private.h"
#define DEBUG 1
#define PROGRESS 0
@@ -55,25 +57,24 @@
** Returns 1 on success, 0 on failure.
*/
static int get_platform_info(int xc_handle, uint32_t dom,
- /* OUT */ uint32_t *max_mfn,
- /* OUT */ uint32_t *hvirt_start,
- /* OUT */ uint32_t *pt_levels)
+ /* OUT */ unsigned long *max_mfn,
+ /* OUT */ unsigned long *hvirt_start,
+ /* OUT */ unsigned int *pt_levels)
{
xen_capabilities_info_t xen_caps = "";
xen_platform_parameters_t xen_params;
- xc_physinfo_t physinfo;
- if (xc_physinfo(xc_handle, &physinfo) != 0)
- return 0;
-
+
if (xc_version(xc_handle, XENVER_platform_parameters, &xen_params) != 0)
return 0;
if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0)
return 0;
- *max_mfn = physinfo.total_pages;
+ if (xc_memory_op(xc_handle, XENMEM_maximum_ram_page, max_mfn) != 0)
+ return 0;
+
*hvirt_start = xen_params.virt_start;
if (strstr(xen_caps, "xen-3.0-x86_64"))
@@ -95,13 +96,22 @@
** entry tell us whether or not the the PFN is currently mapped.
*/
-#define PFN_TO_KB(_pfn) ((_pfn) * PAGE_SIZE / 1024)
+#define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10))
#define ROUNDUP(_x,_w) (((unsigned long)(_x)+(1UL<<(_w))-1) & ~((1UL<<(_w))-1))
-/* Size in bytes of the M2P and P2M (both rounded up to nearest PAGE_SIZE) */
-#define M2P_SIZE ROUNDUP((max_mfn * sizeof(unsigned long)), PAGE_SHIFT)
-#define P2M_SIZE ROUNDUP((max_pfn * sizeof(unsigned long)), PAGE_SHIFT)
+/*
+** The M2P is made up of some number of 'chunks' of at least 2MB in size.
+** The below definitions and utility function(s) deal with mapping the M2P
+** regarldess of the underlying machine memory size or architecture.
+*/
+#define M2P_SHIFT L2_PAGETABLE_SHIFT_PAE
+#define M2P_CHUNK_SIZE (1 << M2P_SHIFT)
+#define M2P_SIZE(_m) ROUNDUP(((_m) * sizeof(unsigned long)), M2P_SHIFT)
+#define M2P_CHUNKS(_m) (M2P_SIZE((_m)) >> M2P_SHIFT)
+
+/* Size in bytes of the P2M (rounded up to the nearest PAGE_SIZE bytes) */
+#define P2M_SIZE ROUNDUP((max_pfn * sizeof(unsigned long)), PAGE_SHIFT)
/* Number of unsigned longs in a page */
#define ulpp (PAGE_SIZE/sizeof(unsigned long))
diff -r 9b345321fd06 -r fe3a892b33b4 tools/python/xen/xend/XendCheckpoint.py
--- a/tools/python/xen/xend/XendCheckpoint.py Wed Nov 16 14:50:36 2005
+++ b/tools/python/xen/xend/XendCheckpoint.py Wed Nov 16 16:45:03 2005
@@ -129,7 +129,7 @@
l = read_exact(fd, sizeof_unsigned_long,
"not a valid guest state file: pfn count read")
nr_pfns = unpack("=L", l)[0] # XXX endianess
- if nr_pfns > 1024*1024: # XXX
+ if nr_pfns > 16*1024*1024: # XXX
raise XendError(
"not a valid guest state file: pfn count out of range")
diff -r 9b345321fd06 -r fe3a892b33b4 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Nov 16 14:50:36 2005
+++ b/xen/arch/x86/mm.c Wed Nov 16 16:45:03 2005
@@ -898,6 +898,7 @@
return 1;
fail:
+ MEM_LOG("Failure in alloc_l3_table: entry %d", i);
while ( i-- > 0 )
if ( is_guest_l3_slot(i) )
put_page_from_l3e(pl3e[i], pfn);
@@ -948,6 +949,7 @@
return 1;
fail:
+ MEM_LOG("Failure in alloc_l4_table: entry %d", i);
while ( i-- > 0 )
if ( is_guest_l4_slot(i) )
put_page_from_l4e(pl4e[i], pfn);
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|