WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-ppc-devel

[XenPPC] [PATCH] xm save / restore

Xen and Linux patches for 'nonlive' save & restore.
Also improved code for searching page_array[] while saving htab, xc_linux_save.c

A modified version of htab.h is placed in new directory: tools/libxc/xen/asm
Probably need better resolution.

htab is mapped via 'decorating' the pfn, xen/arch/powerpc/mm.c. However, 
no range / validation checking is done at this time.

...................................................


Xen diffs:

diff -r 7669fca80bfc config/powerpc64.mk
--- a/config/powerpc64.mk       Mon Dec 04 11:46:53 2006 -0500
+++ b/config/powerpc64.mk       Wed Dec 13 15:39:32 2006 -0500
@@ -3,3 +3,4 @@ CONFIG_POWERPC_$(XEN_OS) := y
 
 CFLAGS += -DELFSIZE=64
 LIBDIR := lib
+CONFIG_XCUTILS :=y
diff -r 7669fca80bfc tools/libxc/powerpc64/Makefile
--- a/tools/libxc/powerpc64/Makefile    Mon Dec 04 11:46:53 2006 -0500
+++ b/tools/libxc/powerpc64/Makefile    Wed Dec 13 15:39:32 2006 -0500
@@ -2,5 +2,7 @@ GUEST_SRCS-y += powerpc64/xc_linux_build
 GUEST_SRCS-y += powerpc64/xc_linux_build.c
 GUEST_SRCS-y += powerpc64/xc_prose_build.c
 GUEST_SRCS-y += powerpc64/utils.c
+GUEST_SRCS-y += powerpc64/xc_linux_save.c
+GUEST_SRCS-y += powerpc64/xc_linux_restore.c
 
 CTRL_SRCS-y += powerpc64/xc_memory.c
diff -r 7669fca80bfc tools/libxc/xc_private.c
--- a/tools/libxc/xc_private.c  Mon Dec 04 11:46:53 2006 -0500
+++ b/tools/libxc/xc_private.c  Wed Dec 13 15:39:32 2006 -0500
@@ -306,6 +306,23 @@ int xc_get_pfn_list(int xc_handle,
 
     return (ret < 0) ? -1 : domctl.u.getmemlist.num_pfns;
 }
+
+int xc_get_shadow_list( int xc_handle,
+                       uint32_t domid,
+                       uint64_t *htab_raddr)
+{
+    DECLARE_DOMCTL;
+    int ret;
+    
+    domctl.cmd = XEN_DOMCTL_getshadowlist;
+    domctl.domain = (domid_t)domid;
+
+    ret = do_domctl(xc_handle, &domctl);
+    *htab_raddr = domctl.u.getshadowlist.htab_map;
+
+    return (ret < 0) ? -1 : domctl.u.getshadowlist.htab_num_ptes;
+}
+
 #endif
 
 long xc_get_tot_pages(int xc_handle, uint32_t domid)
diff -r 7669fca80bfc tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Mon Dec 04 11:46:53 2006 -0500
+++ b/tools/libxc/xenctrl.h     Wed Dec 13 15:39:32 2006 -0500
@@ -518,6 +518,8 @@ int xc_get_pfn_list(int xc_handle, uint3
 int xc_get_pfn_list(int xc_handle, uint32_t domid, xen_pfn_t *pfn_buf,
                     unsigned long max_pfns);
 
+int xc_get_shadow_list(int xc_handle, uint32_t domid, uint64_t *mfn_htab_map);
+
 unsigned long xc_ia64_fpsr_default(void);
 
 int xc_ia64_get_pfn_list(int xc_handle, uint32_t domid,
diff -r 7669fca80bfc tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Mon Dec 04 11:46:53 2006 -0500
+++ b/tools/python/xen/xend/XendDomainInfo.py   Wed Dec 13 15:39:32 2006 -0500
@@ -461,6 +461,7 @@ class Common_XendDomainInfo:
         if self.state in (DOM_STATE_HALTED, DOM_STATE_SUSPENDED):
             try:
                 self._constructDomain()
+                self._allocMem2()
                 self._storeVmDetails()
                 self._createDevices()
                 self._createChannels()
@@ -1237,7 +1238,67 @@ class Common_XendDomainInfo:
         # Set maximum number of vcpus in domain
         xc.domain_max_vcpus(self.domid, int(self.info['vcpus']))
 
-
+    # Use the saved architecture- and image-specific calculations
+    # the various headrooms necessary, given the raw configured
+    # values. maxmem, memory, and shadow are all in KiB.
+                
+    def _allocMem2(self):
+        log.debug("allocMem2");
+
+        maxmem = self.info['maxmem'] * 1024
+        memory = self.info['memory'] * 1024
+        shadow = self.info['shadow_memory'] * 1024
+
+        # Round shadow up to a multiple of a MiB, as shadow_mem_control
+        # takes MiB and we must not round down and end up under-providing.
+        shadow = ((shadow + 1023) / 1024) * 1024
+
+        # set memory limit
+        xc.domain_setmaxmem(self.domid, maxmem)
+
+        # Make sure there's enough RAM available for the domain
+        balloon.free(memory + shadow)
+
+        # Set up the shadow memory, i.e. the PowerPC hash table
+        shadow_cur = xc.shadow_mem_control(self.domid, shadow / 1024)
+        self.info['shadow_memory'] = shadow_cur
+
+        rma_log = 26 ### self.info['powerpc_rma_log']
+        if rma_log == 0:
+            # use smallest RMA size available
+            rma_log = self.getRealModeLogs()[0]
+
+        if rma_log not in self.getRealModeLogs():
+            raise ValueError("rma_log(%d) must be one of" % rma_log,
+                             self.getRealModeLogs())
+            
+        # store info for FlatDeviceTree            
+        ### self.info['powerpc_rma_log'] = rma_log
+            
+        rma_kb = (1 << rma_log) / 1024
+        if memory < rma_kb:
+            raise ValueError("Domain memory must be at least %d KB" % rma_kb)
+
+        if memory % (16 << 10):
+            raise ValueError("Domain memory %dKB must be a multiple of 16MB"
+                             % memory)
+
+        # allocate the RMA
+        log.debug("alloc_real_mode_area(%d, %d)", self.domid, rma_log)
+        xc.alloc_real_mode_area(self.domid, rma_log)
+
+        # now allocate the remaining memory as large-order allocations
+        memory -= rma_kb
+        extent_log = 24 # 16 MB
+        page_log = 12 # 4 KB
+        extent_order = extent_log - page_log
+        log.debug("increase_reservation(%d, 0x%x, %d)", self.domid,
+                  memory, extent_order)
+        xc.domain_memory_increase_reservation(self.domid,
+                                              memory,
+                                              extent_order)
+                
+ 
     def _introduceDomain(self):
         assert self.domid is not None
         assert self.store_mfn is not None
diff -r 7669fca80bfc xen/arch/powerpc/domain.c
--- a/xen/arch/powerpc/domain.c Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/arch/powerpc/domain.c Wed Dec 13 15:39:32 2006 -0500
@@ -152,7 +152,32 @@ void vcpu_destroy(struct vcpu *v)
 
 int arch_set_info_guest(struct vcpu *v, vcpu_guest_context_t *c)
 { 
+    int i;
+
     memcpy(&v->arch.ctxt, &c->user_regs, sizeof(c->user_regs));
+
+    for ( i = 0; i < NUM_SLB_ENTRIES; i++) {
+       memcpy(&v->arch.slb_entries[i], &c->slb_entries[i], sizeof(struct 
slb_entry));
+    }
+
+    for ( i = 0; i< 4; i++) v->arch.sprg[i] = c->sprg[i];
+
+    v->arch.timebase = c->timebase;
+    v->arch.dar = c->dar;
+    v->arch.dsisr = c->dsisr;
+
+    memcpy( &v->arch.cpu, &c->cpu, sizeof(struct cpu_vcpu));
+    v->arch.dec = c->dec;
+
+#ifdef HAS_FLOAT
+    memcpy( v->arch.fprs, c->fprs, sizeof(double)*NUM_FPRS);
+#endif /* HAS_FLOAT */
+
+#ifdef HAS_VMX
+    memcpy( &v->arch.vrs, &c->vrs, sizeof(vector128)*32);
+    memcpy( &v->arch.vscr, &c->vscr, sizeof(vector128));
+    v->arch.vrsave = c->vrsave;
+#endif /* HAS_VMX */
 
     printk("Domain[%d].%d: initializing\n",
            v->domain->domain_id, v->vcpu_id);
diff -r 7669fca80bfc xen/arch/powerpc/domctl.c
--- a/xen/arch/powerpc/domctl.c Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/arch/powerpc/domctl.c Wed Dec 13 15:39:32 2006 -0500
@@ -29,10 +29,37 @@
 #include <public/sysctl.h>
 #include <asm/processor.h>
 
+#define        DECOR   0x80000000      // indicates htab address
+
+
 void arch_getdomaininfo_ctxt(struct vcpu *, vcpu_guest_context_t *);
 void arch_getdomaininfo_ctxt(struct vcpu *v, vcpu_guest_context_t *c)
 { 
+    int i;
+
     memcpy(&c->user_regs, &v->arch.ctxt, sizeof(struct cpu_user_regs));
+    for (i = 0; i < NUM_SLB_ENTRIES; i++) {
+       memcpy(&c->slb_entries[i],&v->arch.slb_entries[i],sizeof(struct 
slb_entry));
+    }
+
+    for (i = 0; i < 4; i++) c->sprg[i] = v->arch.sprg[i] ;
+    c->timebase = v->arch.timebase;
+    c->dar = v->arch.dar;
+    c->dsisr = v->arch.dsisr;
+    memcpy(&c->cpu,&v->arch.cpu,sizeof(struct cpu_vcpu));
+    c->dec = v->arch.dec;
+
+#ifdef HAS_FLOAT
+    memcpy(c->fprs,v->arch.fprs,sizeof(double)*NUM_FPRS);
+#endif         /*  HAS_FLOAT */
+
+#ifdef HAS_VMX
+    memcpy(c->vrs, v->arch.vrs, sizeof(vector128)*32);
+    memcpy(&c->vscr, &v->arch.vscr, sizeof(vector128));
+    c->vrsave = v->arch.vrsave;
+#endif /* HAS_VMX */
+
+
     /* XXX fill in rest of vcpu_guest_context_t */
 }
 
@@ -108,6 +135,27 @@ long arch_do_domctl(struct xen_domctl *d
         }
     }
     break;
+    case XEN_DOMCTL_getshadowlist:
+    {
+       struct domain *d = find_domain_by_id(domctl->domain);
+       uint num_ptes;
+
+       ret = -EINVAL;
+       if ( d != NULL)
+       {
+          ret = 0;
+          
+          domctl->u.getshadowlist.htab_map = (uint64_t)(d->arch.htab.map);
+
+          num_ptes = 1UL << d->arch.htab.log_num_ptes;
+          domctl->u.getshadowlist.htab_num_ptes = num_ptes;
+       
+          copy_to_guest(u_domctl, domctl, 1);
+          put_domain(d);
+       }
+    }
+    break;
+
 
     default:
         ret = -ENOSYS;
diff -r 7669fca80bfc xen/arch/powerpc/mm.c
--- a/xen/arch/powerpc/mm.c     Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/arch/powerpc/mm.c     Wed Dec 13 15:39:32 2006 -0500
@@ -37,6 +37,8 @@
 #define MEM_LOG(_f, _a...) ((void)0)
 #endif
 
+#define        DECOR 0x80000000UL
+
 /* Frame table and its size in pages. */
 struct page_info *frame_table;
 unsigned long max_page;
@@ -408,6 +410,11 @@ ulong pfn2mfn(struct domain *d, ulong pf
     ulong foreign_map_pfn = 1UL << cpu_foreign_map_order();
 
     /* quick tests first */
+    if (pfn & DECOR)
+    {
+        mfn = pfn & ~DECOR;            //*** TBD Check for valid htab range?
+    }  
+    else
     if (pfn & foreign_map_pfn) {
         t = PFN_TYPE_FOREIGN;
         mfn = foreign_to_mfn(d, pfn);
diff -r 7669fca80bfc xen/include/asm-powerpc/domain.h
--- a/xen/include/asm-powerpc/domain.h  Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/include/asm-powerpc/domain.h  Wed Dec 13 15:39:32 2006 -0500
@@ -51,10 +51,6 @@ struct arch_domain {
     uint large_page_order[4];
 } __cacheline_aligned;
 
-struct slb_entry {
-    ulong slb_vsid;
-    ulong slb_esid;
-};
 #define SLB_ESID_VALID (1ULL << (63 - 36))
 #define SLB_ESID_CLASS (1ULL << (63 - 56))
 #define SLB_ESID_MASK  (~0ULL << (63 - 35))
@@ -63,9 +59,9 @@ struct slb_entry {
 
 struct xencomm;
 
-typedef struct {
-    u32 u[4];
-} __attribute__((aligned(16))) vector128;
+#ifdef HAS_VMX
+typedef  _vector128 vector128;
+#endif /* HAS_VMX */
 
 struct arch_vcpu {
     cpu_user_regs_t ctxt; /* User-level CPU registers */
diff -r 7669fca80bfc xen/include/asm-powerpc/htab.h
--- a/xen/include/asm-powerpc/htab.h    Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/include/asm-powerpc/htab.h    Wed Dec 13 15:39:32 2006 -0500
@@ -69,68 +69,68 @@
 
 union pte {
     struct pte_words {
-        ulong vsid;
-        ulong rpn;
+        uint64_t vsid;
+        uint64_t rpn;
     } words;
     struct pte_bits {
         /* *INDENT-OFF* */
         /* high word */
-        ulong avpn:     57; /* [0-56] abbreviated virtual page number */
-        ulong lock:     1;  /* [57] hypervisor lock bit */
-        ulong res:      1;  /* [58] reserved for hypervisor */
-        ulong bolted:   1;  /* [59] XXX software-reserved; temp hack */
-        ulong sw:       1;  /* [60] reserved for software */
-        ulong l:        1;  /* [61] Large Page */
-        ulong h:        1;  /* [62] hash function id */
-        ulong v:        1;  /* [63] valid */
+        uint64_t avpn:     57; /* [0-56] abbreviated virtual page number */
+        uint64_t lock:     1;  /* [57] hypervisor lock bit */
+        uint64_t res:      1;  /* [58] reserved for hypervisor */
+        uint64_t bolted:   1;  /* [59] XXX software-reserved; temp hack */
+        uint64_t sw:       1;  /* [60] reserved for software */
+        uint64_t l:        1;  /* [61] Large Page */
+        uint64_t h:        1;  /* [62] hash function id */
+        uint64_t v:        1;  /* [63] valid */
 
         /* low word */
-        ulong pp0:  1;  /* [0] page protection bit 0 (current PowerPC
+        uint64_t pp0:  1;  /* [0] page protection bit 0 (current PowerPC
                          *     specification says it can always be 0) */
-        ulong ts:   1;  /* [1] tag select */
-        ulong rpn:  50; /* [2-51] real page number */
-        ulong res2: 2;  /* [52,53] reserved */
-        ulong ac:   1;  /* [54] address compare */
-        ulong r:    1;  /* [55] referenced */
-        ulong c:    1;  /* [56] changed */
-        ulong w:    1;  /* [57] write through */
-        ulong i:    1;  /* [58] cache inhibited */
-        ulong m:    1;  /* [59] memory coherent */
-        ulong g:    1;  /* [60] guarded */
-        ulong n:    1;  /* [61] no-execute */
-        ulong pp1:  2;  /* [62,63] page protection bits 1:2 */
+        uint64_t ts:   1;  /* [1] tag select */
+        uint64_t rpn:  50; /* [2-51] real page number */
+        uint64_t res2: 2;  /* [52,53] reserved */
+        uint64_t ac:   1;  /* [54] address compare */
+        uint64_t r:    1;  /* [55] referenced */
+        uint64_t c:    1;  /* [56] changed */
+        uint64_t w:    1;  /* [57] write through */
+        uint64_t i:    1;  /* [58] cache inhibited */
+        uint64_t m:    1;  /* [59] memory coherent */
+        uint64_t g:    1;  /* [60] guarded */
+        uint64_t n:    1;  /* [61] no-execute */
+        uint64_t pp1:  2;  /* [62,63] page protection bits 1:2 */
         /* *INDENT-ON* */
     } bits;
 };
 
 union ptel {
-    ulong word;
+    uint64_t word;
     struct ptel_bits {
         /* *INDENT-OFF* */
 
-        ulong pp0:  1;  /* page protection bit 0 (current PPC
+        uint64_t pp0:  1;  /* page protection bit 0 (current PPC
                          *   AS says it can always be 0) */
-        ulong ts:   1;  /* tag select */
-        ulong rpn:  50; /* real page number */
-        ulong res2: 2;  /* reserved */
-        ulong ac:   1;  /* address compare */
-        ulong r:    1;  /* referenced */
-        ulong c:    1;  /* changed */
-        ulong w:    1;  /* write through */
-        ulong i:    1;  /* cache inhibited */
-        ulong m:    1;  /* memory coherent */
-        ulong g:    1;  /* guarded */
-        ulong n:    1;  /* no-execute */
-        ulong pp1:  2;  /* page protection bits 1:2 */
+        uint64_t ts:   1;  /* tag select */
+        uint64_t rpn:  50; /* real page number */
+        uint64_t res2: 2;  /* reserved */
+        uint64_t ac:   1;  /* address compare */
+        uint64_t r:    1;  /* referenced */
+        uint64_t c:    1;  /* changed */
+        uint64_t w:    1;  /* write through */
+        uint64_t i:    1;  /* cache inhibited */
+        uint64_t m:    1;  /* memory coherent */
+        uint64_t g:    1;  /* guarded */
+        uint64_t n:    1;  /* no-execute */
+        uint64_t pp1:  2;  /* page protection bits 1:2 */
         /* *INDENT-ON* */
     } bits;
 };
 
 struct domain_htab {
-    ulong sdr1;
+    uint64_t sdr1;
     uint log_num_ptes;  /* log number of PTEs in HTAB. */
     uint order;         /* order for freeing. */
     union pte *map;     /* access the htab like an array */
-    ulong *shadow;      /* idx -> logical translation array */
+    uint64_t *shadow;      /* idx -> logical translation array */
 };
 #endif
diff -r 7669fca80bfc xen/include/public/arch-powerpc.h
--- a/xen/include/public/arch-powerpc.h Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/include/public/arch-powerpc.h Wed Dec 13 15:39:32 2006 -0500
@@ -98,11 +98,66 @@ typedef struct cpu_user_regs cpu_user_re
 
 typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ /* XXX timebase */
 
+#define        NUM_SLB_ENTRIES 64
+struct slb_entry {
+       uint64_t slb_vsid;
+       uint64_t slb_esid;
+};
+typedef struct slb_entry slb_entry_t;
+
+#ifndef HAS_VMX
+#define                 HAS_VMX 1
+#endif
+
+#ifndef HAS_FLOAT
+#define                 HAS_FLOAT 1
+#endif
+
+#ifdef HAS_VMX
+typedef struct {
+       uint32_t u[4];
+} __attribute__((aligned(16))) _vector128;
+#endif /* HAS_VMX */
+
+
 /* ONLY used to communicate with dom0! See also struct exec_domain. */
 struct vcpu_guest_context {
     cpu_user_regs_t user_regs;         /* User-level CPU registers     */
+    slb_entry_t        slb_entries[NUM_SLB_ENTRIES];   /* Segment Lookaside 
Buffer */
+
+    /* Special-Purpose Registers */
+    uint64_t sprg[4];
+    uint64_t timebase;
+    uint64_t dar;
+    uint64_t dsisr;
+
+    struct cpu_vcpu_tag {
+       uint64_t hid4;
+    } cpu; /* CPU-specific bits */
+
+    uint32_t dec;
+
+    /* XXX etc */
+#ifdef HAS_FLOAT
+#define  NUM_FPRS 32
+    double fprs[NUM_FPRS];
+#endif
+#ifdef HAS_VMX
+    _vector128 vrs[32];
+    _vector128 vscr;
+    uint32_t vrsave;
+#endif
+
+#if 0
+    struct xencomm *xencomm;
+
+    /* I/O-port access bitmap. */
+    u8 *iobmp;        /* Guest kernel virtual address of the bitmap. */
+    int iobmp_limit;  /* Number of ports represented in the bitmap.  */
+    int iopl;         /* Current IOPL for this VCPU. */
+#endif
+
     uint64_t sdr1;                     /* Pagetable base               */
-    /* XXX etc */
 };
 typedef struct vcpu_guest_context vcpu_guest_context_t;
 DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
diff -r 7669fca80bfc xen/include/public/domctl.h
--- a/xen/include/public/domctl.h       Mon Dec 04 11:46:53 2006 -0500
+++ b/xen/include/public/domctl.h       Wed Dec 13 15:39:32 2006 -0500
@@ -392,6 +392,18 @@ typedef struct xen_domctl_real_mode_area
 typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t);
 
+#define XEN_DOMCTL_getshadowlist       29
+struct xen_domctl_getshadowlist {
+       /* OUT variables */
+       /* Start of htab array */
+       uint64_t htab_map;
+       /* Number of ptes within htab */
+       uint32_t htab_num_ptes;
+};
+
+typedef struct xen_domctl_getshadowlist        xen_domctl_getshadowlist_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getshadowlist_t);
+ 
 struct xen_domctl {
     uint32_t cmd;
     uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
@@ -418,6 +430,7 @@ struct xen_domctl {
         struct xen_domctl_arch_setup        arch_setup;
         struct xen_domctl_settimeoffset     settimeoffset;
         struct xen_domctl_real_mode_area    real_mode_area;
+        struct xen_domctl_getshadowlist     getshadowlist;
         uint8_t                             pad[128];
     } u;
 };
diff -r 7669fca80bfc tools/libxc/powerpc64/xc_linux_restore.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/powerpc64/xc_linux_restore.c  Wed Dec 13 15:39:32 2006 -0500
@@ -0,0 +1,312 @@
+/******************************************************************************
+ * xc_linux_restore.c
+ *
+ * Restore the state of a Linux session.
+ *
+ * Copyright (c) 2003, K A Fraser.
+ * Rewritten for PPC:  Dan Poff <poff@xxxxxxxxxx>, Yi Ge <geyi@xxxxxxxxxx>
+ */
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <xen/asm/htab.h>
+
+#include "xg_private.h"
+
+#define DECOR 0x80000000                        // indicates htab address
+#define LOG_PTE_SIZE            4
+
+#define INVALID_MFN       (~0ULL)
+
+#define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10))
+
+/* total number of pages used by the current guest */
+static unsigned long max_pfn;
+
+static ssize_t
+read_exact(int fd, void *buf, size_t count)
+{
+    int r = 0, s;
+    unsigned char *b = buf;
+
+    while (r < count) {
+        s = read(fd, &b[r], count - r);
+        if ((s == -1) && (errno == EINTR))
+            continue;
+        if (s <= 0) {
+            break;
+        }
+        r += s;
+    }
+
+    return (r == count) ? 1 : 0;
+}
+
+static int
+read_page(int xc_handle, int io_fd, uint32_t dom, xen_pfn_t mfn)
+{
+    void *mem;
+
+    mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                               PROT_READ|PROT_WRITE, mfn);
+    if (mem == NULL) {
+            ERROR("cannot map page");
+           return -1;
+    }
+    if (!read_exact(io_fd, mem, PAGE_SIZE)) {
+            ERROR("Error when reading from state file (5)");
+            return -1;
+    }
+
+    munmap(mem, PAGE_SIZE);
+    return 0;
+}
+
+int
+xc_linux_restore(int xc_handle, int io_fd, uint32_t dom,
+                 unsigned long nr_pfns, unsigned int store_evtchn,
+                 unsigned long *store_mfn, unsigned int console_evtchn,
+                 unsigned long *console_mfn)
+{
+    DECLARE_DOMCTL;
+    int rc = 1, i;
+    xen_pfn_t pfn;
+    xen_pfn_t mfn = INVALID_MFN;
+    unsigned long ver;
+    
+    /* The new domain's shared-info frame number. */
+    unsigned long shared_info_frame;
+    unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
+    shared_info_t *shared_info = (shared_info_t *)shared_info_page;
+
+    /* A copy of the CPU context of the guest. */
+    vcpu_guest_context_t ctxt;
+
+    xen_pfn_t shared_info_pfn, *page_array = NULL;
+
+    /* A temporary mapping of the guest's start_info page. */
+    start_info_t *start_info;
+
+    max_pfn = nr_pfns;
+
+    DPRINTF("xc_linux_restore start: max_pfn = %ld\n", max_pfn);
+
+    if (!read_exact(io_fd, &ver, sizeof(unsigned long))) {
+       ERROR("Error when reading version");
+       goto out;
+    }
+    if (ver != 1) {
+       ERROR("version of save doesn't match");
+       goto out;
+    }
+
+    if (mlock(&ctxt, sizeof(ctxt))) {
+        /* needed for build domctl, but might as well do early */
+        ERROR("Unable to mlock ctxt");
+        return 1;
+    }
+
+    /* Get the domain's shared-info frame. */
+    domctl.cmd = XEN_DOMCTL_getdomaininfo;
+    domctl.domain = (domid_t)dom;
+    if (xc_domctl(xc_handle, &domctl) < 0) {
+        ERROR("Could not get information on new domain");
+        goto out;
+    }
+    shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
+
+    if (xc_domain_setmaxmem(xc_handle, dom, PFN_TO_KB(max_pfn)) != 0) {
+        errno = ENOMEM;
+        goto out;
+    }
+
+    /* Get pages.  */
+    page_array = malloc(max_pfn * sizeof(xen_pfn_t));
+    if (page_array == NULL ) {
+        ERROR("Could not allocate memory");
+        goto out;
+    }
+
+    if (xc_get_pfn_list(xc_handle, dom, 
+                            page_array, max_pfn) != max_pfn) {
+        ERROR("Could not get the page frame list");
+        goto out;
+    }
+
+    DPRINTF("Reloading memory pages:   0%%\n");
+
+    while (1) {
+        if (!read_exact(io_fd, &pfn, sizeof(xen_pfn_t))) {
+            ERROR("Error when reading batch size");
+            goto out;
+        }
+
+       if (pfn == INVALID_MFN)
+           break;
+
+       if (pfn > max_pfn){
+           DPRINTF("pfn: 0x%016llx\n", pfn);
+           continue;
+       }
+
+       mfn = page_array[pfn];
+
+       if (read_page(xc_handle, io_fd, dom, mfn) < 0)
+           goto out;
+    }
+
+    DPRINTF("Received all pages\n");
+
+    /* Read and uncanonicalise htab, page-at-a-time */
+    {
+       int N, total_sent = 0;
+       int num_ptes, htab_ptes, htab_pages;
+        unsigned long htab_mfn;
+        uint64_t htab_raddr;
+        xen_pfn_t htab_rpn;
+        union pte *ppte;
+        char *mem, *temp, *copy;
+
+        htab_ptes = xc_get_shadow_list(xc_handle, dom, &htab_raddr);
+        if (htab_ptes == -1){ 
+            ERROR("Could not get the shadow list");
+            goto out;
+        }
+       
+       if (!read_exact(io_fd, &num_ptes, sizeof(num_ptes))) {
+           ERROR("Error when reading num_ptes");
+           goto out;
+       }
+
+       if (num_ptes != htab_ptes){
+           ERROR("num_ptes != htab_ptes:  %d  %d   htab_raddr: 0x%016llx",
+               num_ptes, htab_ptes, htab_raddr);
+           goto out;
+       }
+
+        temp = malloc(PAGE_SIZE * 2);
+        if (temp == NULL){
+            ERROR("Could not allocate temp memory");
+            goto out;
+        }
+
+        copy = (char *)(((ulong)temp + (PAGE_SIZE - 1)) & (~(PAGE_SIZE-1)));
+
+        htab_mfn = htab_raddr >> PAGE_SHIFT;
+        htab_pages = htab_ptes / (PAGE_SIZE / (1UL << LOG_PTE_SIZE));
+
+        /* Replace guest pfn with rfn, then copy to htab, by page */
+        for (N = 0; N < htab_pages;  N++, htab_mfn++) {
+            mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                       PROT_READ|PROT_WRITE, htab_mfn | DECOR);
+            if (mem == NULL){
+                ERROR("Cannot map htab_mfn 0x%08lx: %s\n", 
+                    htab_mfn, strerror (errno));
+                goto out;
+            }
+
+           if (!read_exact(io_fd, copy, PAGE_SIZE)) {
+               ERROR("Error when reading htab page");
+               goto out;
+           }
+
+            ppte = (union pte *)copy;
+            for (i = 0; i < (PAGE_SIZE / (1UL << LOG_PTE_SIZE)); i++, ppte++){
+                if (ppte->bits.v == 1){                        // valid htab 
entry
+                    htab_rpn = ppte->bits.rpn;         // guest's pfn
+
+                    if (htab_rpn >= max_pfn){
+                        ERROR("htab_rpn: 0x%016llx not found in page_array[]",
+                            htab_rpn);
+                        goto out;
+                    }
+
+                   ppte->bits.rpn = page_array[htab_rpn]; // guest's rpn
+
+                } else {                               // invalid htab entry
+                    ppte->words.rpn = 0;
+                }
+            }
+
+            memcpy(mem, copy, PAGE_SIZE);
+            munmap(mem, PAGE_SIZE);
+            total_sent++;
+        }
+
+        free(temp);
+    }
+
+    /* Read vcpu context and set */
+    if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) {
+        ERROR("Error when reading ctxt");
+        goto out;
+    }
+
+    domctl.cmd = XEN_DOMCTL_setvcpucontext;
+    domctl.domain = (domid_t)dom;
+    domctl.u.vcpucontext.vcpu   = 0;
+    set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt);
+
+    if (xc_domctl(xc_handle, &domctl) != 0) {
+                    ERROR("Couldn't set vcpu context");
+                    goto out;
+    }
+    
+    /* Read shared info.  */
+    shared_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                       PROT_READ|PROT_WRITE, 
shared_info_frame);
+    if (shared_info == NULL) {
+            ERROR("cannot map page");
+           goto out;
+    }
+    if (!read_exact(io_fd, shared_info, PAGE_SIZE)) {
+            ERROR("Error when reading shared_info page");
+           goto out;
+    }
+
+    /* clear any pending events */
+    memset(&(shared_info->evtchn_pending[0]), 0,
+           sizeof (shared_info->evtchn_pending));
+    for (i = 0; i < MAX_VIRT_CPUS; i++)
+        shared_info->vcpu_info[i].evtchn_pending_sel = 0;
+
+    mfn = shared_info_frame - 3 ;
+
+    munmap (shared_info, PAGE_SIZE);
+
+    for (i = 0; i < max_pfn; i++)              // find pfn of 
shared_info_frame 
+       if (page_array[i] == shared_info_frame) break;
+    if ( i >= max_pfn) {
+       ERROR("Cannot find pfn of shared_info_frame");
+       goto out;
+    }
+    shared_info_pfn = (unsigned long) i;
+
+    /* Setup start_info page */
+    start_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                      PROT_READ | PROT_WRITE, mfn);
+    start_info->nr_pages = max_pfn;
+    start_info->shared_info =  shared_info_pfn << PAGE_SHIFT;
+    start_info->flags = 0;
+    *store_mfn = page_array[start_info->store_mfn];
+    start_info->store_evtchn = store_evtchn;
+    *console_mfn = page_array[start_info->console.domU.mfn];
+    start_info->console.domU.evtchn = console_evtchn;
+    munmap(start_info, PAGE_SIZE);
+
+    DPRINTF("Domain ready to be built.\n");
+
+    rc = 0;
+
+ out:
+    if ((rc != 0) && (dom != 0))
+        xc_domain_destroy(xc_handle, dom);
+
+    free (page_array);
+    safe_munlock(&ctxt, sizeof(ctxt));
+
+    DPRINTF("Restore exit with rc=%d\n", rc);
+
+    return rc;
+}
diff -r 7669fca80bfc tools/libxc/powerpc64/xc_linux_save.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/powerpc64/xc_linux_save.c     Wed Dec 13 15:39:32 2006 -0500
@@ -0,0 +1,417 @@
+/******************************************************************************
+ * xc_linux_save.c
+ *
+ * Save the state of a running Linux session.
+ *
+ * Copyright (c) 2003, K A Fraser.
+ * Rewritten for PPC:  Dan Poff <poff@xxxxxxxxxx>, Yi Ge <geyi@xxxxxxxxxx>
+ */
+
+#include <inttypes.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <xen/asm/htab.h>
+
+#include "xg_private.h"
+
+/*
+** Default values for important tuning parameters. Can override by passing
+** non-zero replacement values to xc_linux_save().
+**
+** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too.
+**
+*/
+#define DEF_MAX_ITERS    (4 - 1)                /* limit us to 4 times round 
loop  */
+#define DEF_MAX_FACTOR   3                              /* never send more 
than 3x nr_pfns */
+
+/*
+** During (live) save/migrate, we maintain a number of bitmaps to track
+** which pages we have to send, and to skip.
+*/
+
+#define DECOR 0x80000000                                // indicates htab 
address
+#define LOG_PTE_SIZE            4
+
+#define INVALID_MFN       (~0ULL)
+
+/* total number of pages used by the current guest */
+static unsigned long max_pfn;
+
+static int
+suspend_and_state(int (*suspend)(int), int xc_handle, int io_fd,
+                  int dom, xc_dominfo_t *info)
+{
+    int i = 0;
+
+    if (!(*suspend)(dom)) {
+        ERROR("Suspend request failed");
+        return -1;
+    }
+
+retry:
+    if (xc_domain_getinfo(xc_handle, dom, 1, info) != 1) {
+        ERROR("Could not get domain info");
+        return -1;
+    }
+
+    if (info->shutdown && info->shutdown_reason == SHUTDOWN_suspend)
+        return 0; // success
+
+    if (info->paused) {
+        // try unpausing domain, wait, and retest
+        xc_domain_unpause(xc_handle, dom);
+
+        ERROR("Domain was paused. Wait and re-test.");
+        usleep(10000);  // 10ms
+
+        goto retry;
+    }
+
+
+    if(++i < 100) {
+        ERROR("Retry suspend domain.");
+        usleep(10000);  // 10ms
+        goto retry;
+    }
+
+    ERROR("Unable to suspend domain.");
+
+    return -1;
+}
+
+static inline ssize_t
+write_exact(int fd, void *buf, size_t count)
+{
+    if (write(fd, buf, count) != count)
+        return 0;
+    return 1;
+}
+
+int
+xc_linux_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
+              uint32_t max_factor, uint32_t flags, int (*suspend)(int))
+{
+    xc_dominfo_t info;
+
+    int rc = 1;
+    int debug = 0;
+
+    /* The new domain's shared-info frame number. */
+    unsigned long shared_info_frame;
+
+    /* A copy of the CPU context of the guest. */
+    vcpu_guest_context_t ctxt;
+
+    xen_pfn_t *page_array = NULL;
+
+    /* Live mapping of shared info structure */
+    shared_info_t *live_shinfo = NULL;
+
+    if (xc_domain_getinfo(xc_handle, dom, 1, &info) != 1) {
+        ERROR("Could not get domain info");
+        return 1;
+    }
+
+    shared_info_frame = info.shared_info_frame;
+
+    /* Map the shared info frame */
+    live_shinfo = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                       PROT_READ, shared_info_frame);
+    if (!live_shinfo) {
+        ERROR("Couldn't map live_shinfo");
+        goto out;
+    }
+
+    max_pfn = info.max_memkb >> (PAGE_SHIFT - 10);
+
+    page_array = malloc(max_pfn * sizeof(xen_pfn_t));
+    if (page_array == NULL) {
+        ERROR("Could not allocate memory");
+        goto out;
+    }
+
+    /* This is expected by xm restore.  */
+    if (!write_exact(io_fd, &max_pfn, sizeof(unsigned long))) {
+        ERROR("write: max_pfn");
+        goto out;
+    }
+
+    /* xc_linux_restore starts to read here.  */
+    /* Write a version number.  This can avoid searching for a stupid bug
+       if the format change.
+       The version is hard-coded, don't forget to change the restore code
+       too!  */
+    {
+        unsigned long version = 1;
+
+        if (!write_exact(io_fd, &version, sizeof(unsigned long))) {
+            ERROR("write: version");
+            goto out;
+        }
+    }
+
+    /* This is a non-live suspend. Issue the call back to get the
+       domain suspended */
+    
+    if (suspend_and_state(suspend, xc_handle, io_fd, dom, &info)) {
+        ERROR("Domain appears not to have suspended");
+        goto out;
+    }
+    
+
+    {
+        char *mem;
+        xen_pfn_t pfn;
+        unsigned int total_sent = 0;
+
+        if (xc_get_pfn_list(xc_handle, dom, 
+                            page_array, max_pfn) != max_pfn) {
+            ERROR("Could not get the page frame list");
+            goto out;
+        }
+
+        /* Start writing out the saved-domain record. */
+        for (pfn = 0; pfn < max_pfn; pfn++){
+            if (page_array[pfn] == INVALID_MFN)
+                continue;
+
+            if (debug)
+                fprintf(stderr, "xc_linux_save: page %llx (%llu/%lu)\n",
+                        page_array[pfn], pfn, max_pfn);
+
+            mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                       PROT_READ|PROT_WRITE, page_array[pfn]);
+            if (mem == NULL) {
+                ERROR("cannot map page %llx: %s",
+                        page_array[pfn], strerror (errno));
+                goto out;
+            }
+
+            if (!write_exact(io_fd, &pfn, sizeof(pfn))) {
+                ERROR("Error when writing to state file (4)");
+                goto out;
+            }
+
+            if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) {
+                ERROR("Error when writing to state file (5)");
+                goto out;
+            }
+            munmap(mem, PAGE_SIZE);
+            total_sent++;
+        }
+    }
+
+    DPRINTF("All memory is saved\n");
+
+    /* terminate memory dump */
+    {
+        xen_pfn_t pfn = INVALID_MFN;
+        if (!write_exact(io_fd, &pfn, sizeof(pfn))) {
+            ERROR("Error when writing to state file (6)");
+            goto out;
+        }
+    }
+    
+    /* Canonicalize htab and save */
+    {
+        int i, k, n;
+        int N, total_sent = 0;
+        int htab_ptes, htab_pages, n_chunks;
+        unsigned long htab_mfn;
+        uint64_t htab_raddr;
+        xen_pfn_t htab_rpn, pfn = 0;
+        union pte *ppte;
+        char *mem, *temp, *copy;
+        unsigned long long sizes;
+        struct chunk_array {xen_pfn_t mfn; unsigned long long size;} *p_chunk;
+
+        n_chunks = 1;
+        for (k = 0; k < max_pfn - 1; k++){     // find number of chunks
+            if (page_array[k] + 1 != page_array[k+1]){
+                n_chunks += 1;
+            }
+        }
+
+        p_chunk = malloc(n_chunks * sizeof(struct chunk_array));
+        if (p_chunk == NULL) {
+            ERROR("Could not allocate memory for chunk_array");
+            goto out;
+        }
+
+        k = 0; n = 0;
+        p_chunk[n].mfn = page_array[k];
+        p_chunk[n].size = 1;
+
+        for (k = 0; k < max_pfn - 1; k++){     // record mfn for start of each 
chunk, size
+            if (page_array[k] + 1 != page_array[k+1]){
+                p_chunk[n+1].mfn = page_array[k+1];
+                p_chunk[n+1].size = 1;
+                n += 1;
+            } else {
+                p_chunk[n].size += 1;
+            }
+        }
+#if 0
+        DPRINTF("n_chunks: %d\n", n_chunks);
+        for (i = 0; i < n_chunks; i++){
+            DPRINTF("0x%016llx  0x%016llx\n", p_chunk[i].mfn, p_chunk[i].size);
+        }
+#endif
+        htab_ptes = xc_get_shadow_list(xc_handle, dom, &htab_raddr);
+        if (htab_ptes == -1){ 
+            ERROR("Could not get the shadow list");
+            goto out;
+        }
+
+        temp = malloc(PAGE_SIZE * 2);
+        if (temp == NULL){
+            ERROR("Could not allocate temp memory");
+            goto out;
+        }
+
+        copy = (char *)(((ulong)temp + (PAGE_SIZE - 1)) & (~(PAGE_SIZE-1)));
+
+        htab_mfn = htab_raddr >> PAGE_SHIFT;
+        htab_pages = htab_ptes / (PAGE_SIZE / (1UL << LOG_PTE_SIZE));
+        // DPRINTF("htab_pages: 0x%08lx htab_addr: %llx  htab_mfn %lx\n",
+        // htab_pages,htab_raddr, htab_mfn);
+
+        if (!write_exact(io_fd, &htab_ptes, sizeof(htab_ptes))) {
+            ERROR("Error when writing to state file (6)");
+            goto out;
+        }
+
+        /* Replace rpn with guest pfn, then write out htab, by page */
+        for (N = 0; N < htab_pages;  N++, htab_mfn++) {
+            mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                       PROT_READ|PROT_WRITE, htab_mfn | DECOR);
+            if (mem == NULL){
+                ERROR("Cannot map htab_mfn 0x%08lx: %s\n", 
+                    htab_mfn, strerror (errno));
+                goto out;
+            }
+
+            memcpy(copy, mem, PAGE_SIZE);
+
+            /* Improved search of page_array[] for htab_rpn - by chunks*/
+            ppte = (union pte *)copy;
+            for (i = 0; i < (PAGE_SIZE / (1UL << LOG_PTE_SIZE)); i++, ppte++){
+                if (ppte->bits.v == 1){                                // 
valid htab entry
+                    sizes = 0;
+                    htab_rpn = ppte->bits.rpn;
+                    for (n = 0; n < n_chunks; n++){    // search by memory 
chunk
+                        if ((htab_rpn >= p_chunk[n].mfn) &&
+                            (htab_rpn < (p_chunk[n].mfn) + p_chunk[n].size)){
+                            pfn = (htab_rpn - p_chunk[n].mfn) + sizes;
+                            break;
+                        } else {
+                            sizes += p_chunk[n].size;
+                        }
+                    }
+
+                    if (n >= n_chunks){
+                        ERROR("htab_rpn: 0x%016llx not found in page_array[]",
+                            htab_rpn);
+                        goto out;
+                    }
+
+                    if (pfn >= max_pfn){
+                        ERROR("pfn >= max_pfn: 0x%08llx 0x%08lx", pfn, 
max_pfn);
+                        goto out;
+                    }
+
+//***  validation                                              
+                    for (k = 0; k < max_pfn; k++){             // linear search
+                        if (htab_rpn == page_array[k])
+                            break;
+                    }
+
+                    if (k != pfn){
+                        ERROR("k != pfn: 0x%08x 0x%08llx", k, pfn);
+                             ERROR("htab_rpn: 0x%016llx", htab_rpn);
+                        goto out;
+                    }
+
+                    if (k >= max_pfn){
+                        ERROR("htab_rpn: 0x%016llx not found in page_array[] 
%d",
+                            htab_rpn, i);
+                        goto out;
+                    }
+//***
+
+                    ppte->bits.rpn = pfn;
+                } else {                                                       
// invalid htab entry
+                    ppte->words.rpn = 0;
+                }
+            }
+
+            if (write(io_fd, copy, PAGE_SIZE) != PAGE_SIZE) {
+                ERROR("Error when writing to state file (7)");
+                goto out;
+            }
+
+            munmap(mem, PAGE_SIZE);
+            total_sent++;
+        }
+
+        free(temp);
+        free(p_chunk);
+    }
+        
+    /* save vcpu context only for vcpu 0;                                   */
+    /* linux already suspended other vcpus via smp_suspend() */
+    if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) {
+        ERROR("Could not get vcpu context");
+        goto out;
+    }
+
+    if (!write_exact(io_fd, &ctxt, sizeof(ctxt))) {
+        ERROR("Error when writing to state file (1)");
+        goto out;
+    }
+
+    if (!write_exact(io_fd, live_shinfo, PAGE_SIZE)) {
+        ERROR("Error when writing to state file (1)");
+        goto out;
+    }
+
+    /* Success! */
+    rc = 0;
+
+#if 0
+    DPRINTF("Domain ready to be built.\n");
+
+    domctl.cmd = XEN_DOMCTL_setvcpucontext;
+    domctl.domain = (domid_t)dom;
+    domctl.u.vcpucontext.vcpu   = 0;
+    set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt);
+    rc = xc_domctl(xc_handle, &domctl);
+
+    if (rc != 0) {
+        ERROR("Couldn't build the domain");
+        goto out;
+    }
+#endif
+
+ out:
+
+    free(page_array);
+    if (live_shinfo)
+        munmap(live_shinfo, PAGE_SIZE);
+
+    DPRINTF("Save exit rc=%d\n",rc);
+
+    return !!rc;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 7669fca80bfc tools/libxc/xen/asm/htab.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/libxc/xen/asm/htab.h        Wed Dec 13 15:39:32 2006 -0500
@@ -0,0 +1,134 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) IBM Corp. 2005
+ *
+ * Authors: Hollis Blanchard <hollisb@xxxxxxxxxx>
+ */
+
+#ifndef _ASM_HTAB_H_
+#define _ASM_HTAB_H_
+
+
+/***** general PowerPC architecture limits ******/
+
+/* 256KB, from PowerPC Architecture specification */
+#define HTAB_MIN_LOG_SIZE 18
+
+#define LOG_NUM_PTES_IN_PTEG    3
+#define NUM_PTES_IN_PTEG        (1 << LOG_NUM_PTES_IN_PTEG)
+#define LOG_PTE_SIZE            4
+#define LOG_PTEG_SIZE           (LOG_NUM_PTES_IN_PTEG + LOG_PTE_SIZE)
+#define LOG_HTAB_HASH           (LOG_HTAB_SIZE - LOG_PTEG_SIZE)
+
+/* real page number shift to create the rpn field of the pte */
+#define RPN_SHIFT 12
+
+/* page protection bits in pp1 (name format: MSR:PR=0 | MSR:PR=1) */
+#define PP_RWxx 0x0UL
+#define PP_RWRW 0x2UL
+#define PP_RWRx 0x4UL
+#define PP_RxRx 0x6UL
+
+/***** 64-bit PowerPC architecture limits ******/
+
+#define SDR1_HTABORG_MASK   0xfffffffffff80000ULL
+#define SDR1_HTABSIZE_MASK  0x1fUL
+#define SDR1_HTABSIZE_MAX   46
+#define SDR1_HTABSIZE_BASEBITS 11
+
+/* used to turn a vsid into a number usable in the hash function */
+#define VSID_HASH_MASK 0x0000007fffffffffUL
+
+/* used to turn a vaddr into an api for a pte */
+#define VADDR_TO_API(vaddr) (((vaddr) & API_MASK) >> API_SHIFT)
+#define API_VEC   0x1fUL
+#define API_SHIFT 23
+#define API_MASK  (API_VEC << API_SHIFT)
+
+/***** hypervisor internals ******/
+
+/* 64M: reasonable hypervisor limit? */
+#define HTAB_MAX_LOG_SIZE 26
+
+#define GET_HTAB(domain) ((domain)->arch.htab.sdr1 & SDR1_HTABORG_MASK)
+
+union pte {
+    struct pte_words {
+        uint64_t vsid;
+        uint64_t rpn;
+    } words;
+    struct pte_bits {
+        /* *INDENT-OFF* */
+        /* high word */
+        uint64_t avpn:     57; /* [0-56] abbreviated virtual page number */
+        uint64_t lock:     1;  /* [57] hypervisor lock bit */
+        uint64_t res:      1;  /* [58] reserved for hypervisor */
+        uint64_t bolted:   1;  /* [59] XXX software-reserved; temp hack */
+        uint64_t sw:       1;  /* [60] reserved for software */
+        uint64_t l:        1;  /* [61] Large Page */
+        uint64_t h:        1;  /* [62] hash function id */
+        uint64_t v:        1;  /* [63] valid */
+
+        /* low word */
+        uint64_t pp0:  1;  /* [0] page protection bit 0 (current PowerPC
+                         *     specification says it can always be 0) */
+        uint64_t ts:   1;  /* [1] tag select */
+        uint64_t rpn:  50; /* [2-51] real page number */
+        uint64_t res2: 2;  /* [52,53] reserved */
+        uint64_t ac:   1;  /* [54] address compare */
+        uint64_t r:    1;  /* [55] referenced */
+        uint64_t c:    1;  /* [56] changed */
+        uint64_t w:    1;  /* [57] write through */
+        uint64_t i:    1;  /* [58] cache inhibited */
+        uint64_t m:    1;  /* [59] memory coherent */
+        uint64_t g:    1;  /* [60] guarded */
+        uint64_t n:    1;  /* [61] no-execute */
+        uint64_t pp1:  2;  /* [62,63] page protection bits 1:2 */
+        /* *INDENT-ON* */
+    } bits;
+};
+
+union ptel {
+    uint64_t word;
+    struct ptel_bits {
+        /* *INDENT-OFF* */
+
+        uint64_t pp0:  1;  /* page protection bit 0 (current PPC
+                         *   AS says it can always be 0) */
+        uint64_t ts:   1;  /* tag select */
+        uint64_t rpn:  50; /* real page number */
+        uint64_t res2: 2;  /* reserved */
+        uint64_t ac:   1;  /* address compare */
+        uint64_t r:    1;  /* referenced */
+        uint64_t c:    1;  /* changed */
+        uint64_t w:    1;  /* write through */
+        uint64_t i:    1;  /* cache inhibited */
+        uint64_t m:    1;  /* memory coherent */
+        uint64_t g:    1;  /* guarded */
+        uint64_t n:    1;  /* no-execute */
+        uint64_t pp1:  2;  /* page protection bits 1:2 */
+        /* *INDENT-ON* */
+    } bits;
+};
+
+struct domain_htab {
+    uint64_t sdr1;
+    uint log_num_ptes;  /* log number of PTEs in HTAB. */
+    uint order;         /* order for freeing. */
+    union pte *map;     /* access the htab like an array */
+    uint64_t *shadow;      /* idx -> logical translation array */
+};
+#endif


Linux diffs:

diff -r c8d1f32fd7de arch/powerpc/platforms/xen/hcall.c
--- a/arch/powerpc/platforms/xen/hcall.c        Wed Nov 22 14:51:54 2006 -0500
+++ b/arch/powerpc/platforms/xen/hcall.c        Wed Dec 13 15:54:20 2006 -0500
@@ -256,6 +256,7 @@ static int xenppc_privcmd_domctl(privcmd
        case XEN_DOMCTL_pausedomain:
        case XEN_DOMCTL_unpausedomain:
        case XEN_DOMCTL_getdomaininfo:
+       case XEN_DOMCTL_getshadowlist:
                break;
        case XEN_DOMCTL_getmemlist:
                ret = xencomm_create(
diff -r c8d1f32fd7de arch/powerpc/platforms/xen/reboot.c
--- a/arch/powerpc/platforms/xen/reboot.c       Wed Nov 22 14:51:54 2006 -0500
+++ b/arch/powerpc/platforms/xen/reboot.c       Wed Dec 13 15:54:20 2006 -0500
@@ -1,10 +1,20 @@
 #include <linux/module.h>
+#include <linux/kernel.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/io/console.h>
 #include <xen/xencons.h>
+#include <xen/cpu_hotplug.h>
+#include <xen/xenbus.h>
+#include <xen/gnttab.h>
+#include <xen/evtchn.h>
 #include <asm/hypervisor.h>
 #include <asm/machdep.h>
+#include <asm/mmu_context.h>
 
+#define  SHUTDOWN_INVALID               -1
+
+extern int shutting_down ;
+ 
 static void domain_machine_restart(char * __unused)
 {
        /* We really want to get pending console data out before we die. */
@@ -31,3 +41,86 @@ void xen_reboot_init(struct machdep_call
                ppc_md.halt      = domain_machine_power_off;
        }
 }
+
+static void switch_idle_mm(void)
+{
+                struct mm_struct *mm = current->active_mm;
+
+                if (mm == &init_mm)
+                                return;
+
+                atomic_inc(&init_mm.mm_count);
+                switch_mm(mm, &init_mm, current);
+                current->active_mm = &init_mm;
+                mmdrop(mm);
+}
+
+int ppc_do_suspend(void *ignore)
+{
+                int  err;
+                enum system_states temp_state;
+
+                BUG_ON(smp_processor_id() != 0);
+                BUG_ON(in_interrupt());
+
+#ifndef CONFIG_PPC_XEN
+                if (xen_feature(XENFEAT_auto_translated_physmap)) {
+                                printk(KERN_WARNING "Cannot suspend in "
+                                       "auto_translated_physmap mode.\n");
+                                return -EOPNOTSUPP;
+                }
+#endif
+                err = smp_suspend();
+                if (err)
+                    return err;
+
+                xenbus_suspend();
+
+                preempt_disable();
+
+                local_irq_disable();
+                
+                temp_state = system_state;
+                system_state = SYSTEM_SUSPEND_DISK;
+
+                preempt_enable();
+
+                gnttab_suspend();
+
+                HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
+
+                xen_start_info->store_mfn = 
mfn_to_pfn(xen_start_info->store_mfn);
+                xen_start_info->console_mfn = 
mfn_to_pfn(xen_start_info->console_mfn);
+
+                /*
+                 * We'll stop somewhere inside this hypercall. When it returns,
+                 * we'll start resuming after the restore.
+                 */
+                HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
+
+                shutting_down = SHUTDOWN_INVALID;
+                
+                HYPERVISOR_shared_info = (shared_info_t 
*)__va(xen_start_info->shared_info);
+                memset(empty_zero_page, 0, PAGE_SIZE);
+
+                gnttab_resume();
+
+                irq_resume();
+
+                switch_idle_mm();
+
+                system_state = temp_state;
+                
+                per_cpu(last_jiffy, smp_processor_id()) = get_tbl();   /* 
update time base */
+
+                local_irq_enable();
+
+                xencons_resume();
+
+                xenbus_resume();
+
+                smp_resume();
+
+                return err;
+}
+
diff -r c8d1f32fd7de drivers/xen/core/reboot.c
--- a/drivers/xen/core/reboot.c Wed Nov 22 14:51:54 2006 -0500
+++ b/drivers/xen/core/reboot.c Wed Dec 13 15:54:20 2006 -0500
@@ -7,16 +7,16 @@
 #include <linux/reboot.h>
 #include <linux/sysrq.h>
 #include <linux/stringify.h>
-#include <asm/irq.h>
-#include <asm/mmu_context.h>
-#include <xen/evtchn.h>
-#include <asm/hypervisor.h>
-#include <xen/xenbus.h>
 #include <linux/cpu.h>
 #include <linux/kthread.h>
+#include <xen/evtchn.h>
+#include <xen/xenbus.h>
 #include <xen/gnttab.h>
 #include <xen/xencons.h>
 #include <xen/cpu_hotplug.h>
+#include <asm/irq.h>
+#include <asm/mmu_context.h>
+#include <asm/hypervisor.h>
 
 #if defined(__i386__) || defined(__x86_64__)
 /*
@@ -79,7 +79,7 @@ EXPORT_SYMBOL(machine_power_off);
  */
 
 /* Ignore multiple shutdown requests. */
-static int shutting_down = SHUTDOWN_INVALID;
+int shutting_down = SHUTDOWN_INVALID;
 static void __shutdown_handler(void *unused);
 static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
 
@@ -192,13 +192,14 @@ static int __do_suspend(void *ignore)
 
        return err;
 }
-#else  /* CONFIG_PPC_XEN */
+
+#else
+int ppc_do_suspend(void *ignore);
 static int __do_suspend(void *ignore)
 {
-       printk("SUSPEND!!??\n");
-       return 0;
-}
-#endif  /* CONFIG_PPC_XEN */
+       return ppc_do_suspend(ignore);
+}
+#endif
 
 static int shutdown_process(void *__unused)
 {
diff -r c8d1f32fd7de include/asm-powerpc/xen/asm/hypercall.h
--- a/include/asm-powerpc/xen/asm/hypercall.h   Wed Nov 22 14:51:54 2006 -0500
+++ b/include/asm-powerpc/xen/asm/hypercall.h   Wed Dec 13 15:54:20 2006 -0500
@@ -60,6 +60,16 @@ static inline int HYPERVISOR_shutdown(un
        return HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown);
 }
 
+
+static inline int HYPERVISOR_suspend(unsigned long srec)
+{
+        struct sched_shutdown sched_shutdown = {
+               .reason = SHUTDOWN_suspend
+       };
+
+       return HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown);
+}
+
 static inline int HYPERVISOR_set_timer_op(unsigned long arg)
 {
        return plpar_hcall_norets(XEN_MARK(__HYPERVISOR_set_timer_op), arg);
diff -r c8d1f32fd7de include/xen/interface/arch-powerpc.h
--- a/include/xen/interface/arch-powerpc.h      Wed Nov 22 14:51:54 2006 -0500
+++ b/include/xen/interface/arch-powerpc.h      Wed Dec 13 15:54:20 2006 -0500
@@ -29,7 +29,6 @@
 
 #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name)
 #define XEN_GUEST_HANDLE(name)        __guest_handle_ ## name
-#define XEN_GUEST_HANDLE_64(name)     __guest_handle_ ## name
 #define set_xen_guest_handle(hnd, val) \
     do { \
         if (sizeof ((hnd).__pad)) \
@@ -42,9 +41,6 @@
 #endif
 
 #ifndef __ASSEMBLY__
-
-typedef uint64_t uint64_aligned_t;
-
 /* Guest handles for primitive C types. */
 __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
@@ -98,9 +94,65 @@ typedef struct cpu_user_regs cpu_user_re
 
 typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ /* XXX timebase */
 
+#define        NUM_SLB_ENTRIES 64
+struct slb_entry {
+       uint64_t slb_vsid;
+       uint64_t slb_esid;
+};
+typedef struct slb_entry slb_entry_t;
+
+#ifndef HAS_VMX
+#define                 HAS_VMX 1
+#endif
+
+#ifndef HAS_FLOAT
+#define                 HAS_FLOAT 1
+#endif
+
+#ifdef HAS_VMX
+typedef struct {
+       uint32_t u[4];
+} __attribute__((aligned(16))) _vector128;
+#endif /* HAS_VMX */
+
+
 /* ONLY used to communicate with dom0! See also struct exec_domain. */
 struct vcpu_guest_context {
     cpu_user_regs_t user_regs;         /* User-level CPU registers     */
+    slb_entry_t        slb_entries[NUM_SLB_ENTRIES];   /* Segment Lookaside 
Buffer */
+
+    /* Special-Purpose Registers */
+    uint64_t sprg[4];
+    uint64_t timebase;
+    uint64_t dar;
+    uint64_t dsisr;
+
+    struct cpu_vcpu_tag {
+       uint64_t hid4;
+    } cpu; /* CPU-specific bits */
+
+    uint32_t dec;
+
+    /* XXX etc */
+#ifdef HAS_FLOAT
+#define  NUM_FPRS 32
+    double fprs[NUM_FPRS];
+#endif
+#ifdef HAS_VMX
+    _vector128 vrs[32];
+    _vector128 vscr;
+    uint32_t vrsave;
+#endif
+
+#if 0
+    struct xencomm *xencomm;
+
+    /* I/O-port access bitmap. */
+    u8 *iobmp;        /* Guest kernel virtual address of the bitmap. */
+    int iobmp_limit;  /* Number of ports represented in the bitmap.  */
+    int iopl;         /* Current IOPL for this VCPU. */
+#endif
+
     uint64_t sdr1;                     /* Pagetable base               */
     /* XXX etc */
 };
diff -r c8d1f32fd7de include/xen/interface/domctl.h
--- a/include/xen/interface/domctl.h    Wed Nov 22 14:51:54 2006 -0500
+++ b/include/xen/interface/domctl.h    Wed Dec 13 15:54:20 2006 -0500
@@ -354,6 +354,17 @@ struct xen_domctl_real_mode_area {
 };
 typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t;
 DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t);
+
+#define XEN_DOMCTL_getshadowlist                29
+struct xen_domctl_getshadowlist {
+                /* OUT variables. */
+                /* Start of htab array */
+                uint64_t htab_map;
+                /* Numver of ptes within htab */
+                uint htab_num_ptes;
+};
+typedef struct xen_domctl_getshadowlist  xen_domctl_getshadowlist_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getshadowlist_t);
 
 struct xen_domctl {
     uint32_t cmd;
@@ -381,6 +392,7 @@ struct xen_domctl {
         struct xen_domctl_arch_setup        arch_setup;
         struct xen_domctl_settimeoffset     settimeoffset;
         struct xen_domctl_real_mode_area    real_mode_area;
+        struct xen_domctl_getshadowlist     getshadowlist;
         uint8_t                             pad[128];
     } u;
 };

_______________________________________________
Xen-ppc-devel mailing list
Xen-ppc-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ppc-devel

<Prev in Thread] Current Thread [Next in Thread>
  • [XenPPC] [PATCH] xm save / restore, poff <=