[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] Re: [PATCH 1 of 2] tools/libxc: Remus Checkpoint Compression



Thanks Shriram.

Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx>

On Sun, 2011-06-19 at 05:32 +0100, Shriram Rajagopalan wrote:
> # HG changeset patch
> # User Shriram Rajagopalan <rshriram@xxxxxxxxx>
> # Date 1308455519 25200
> # Node ID b4974a38d10199c1e2b8fd3cf36d91c03ac5eeae
> # Parent  23c068b109236657ededa3e3b7f180346a5cd9f9
> tools/libxc: Remus Checkpoint Compression
> 
> Instead of sending dirty pages of guest memory as-is, use a simple compression
> algorithm that sends a RLE-encoded XOR of the page against its last sent copy.
> A small LRU cache is used to hold recently dirtied pages. Pagetable pages are
> sent as-is, as they are canonicalized at sender side and uncanonicalized at
> receiver.
> 
> Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx>
> 
> diff -r 23c068b10923 -r b4974a38d101 tools/libxc/Makefile
> --- a/tools/libxc/Makefile      Wed Jun 15 16:16:41 2011 +0100
> +++ b/tools/libxc/Makefile      Sat Jun 18 20:51:59 2011 -0700
> @@ -42,7 +42,7 @@
>  GUEST_SRCS-y :=
>  GUEST_SRCS-y += xg_private.c xc_suspend.c
>  GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_domain_save.c
> -GUEST_SRCS-$(CONFIG_MIGRATE) += xc_offline_page.c
> +GUEST_SRCS-$(CONFIG_MIGRATE) += xc_offline_page.c xc_remus.c
>  GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c
> 
>  vpath %.c ../../xen/common/libelf
> diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xc_domain_restore.c
> --- a/tools/libxc/xc_domain_restore.c   Wed Jun 15 16:16:41 2011 +0100
> +++ b/tools/libxc/xc_domain_restore.c   Sat Jun 18 20:51:59 2011 -0700
> @@ -43,6 +43,7 @@
>      xen_pfn_t *p2m_batch; /* A table of P2M mappings in the current region.  
> */
>      int completed; /* Set when a consistent image is available */
>      int last_checkpoint; /* Set when we should commit to the current 
> checkpoint when it completes. */
> +    int compression; /* Set when sender signals that pages would be sent 
> compressed (for Remus) */
>      struct domain_info_context dinfo;
>  };
> 
> @@ -663,6 +664,10 @@
>      /* pages is of length nr_physpages, pfn_types is of length nr_pages */
>      unsigned int nr_physpages, nr_pages;
> 
> +    /* remus compression state */
> +    int compression;
> +    unsigned long compbuf_pos, compbuf_size;
> +
>      /* Types of the pfns in the current region */
>      unsigned long* pfn_types;
> 
> @@ -700,6 +705,7 @@
>  {
>      int count, countpages, oldcount, i;
>      void* ptmp;
> +    unsigned long compbuf_size;
> 
>      if ( RDEXACT(fd, &count, sizeof(count)) )
>      {
> @@ -809,6 +815,46 @@
>          }
>          return pagebuf_get_one(xch, ctx, buf, fd, dom);
> 
> +    case XC_SAVE_ID_ENABLE_COMPRESSION:
> +        /* We cannot set compression flag directly in pagebuf structure,
> +         * since this pagebuf still has uncompressed pages that are yet to
> +         * be applied. We enable the compression field in pagebuf structure
> +         * after receiving the first tailbuf.
> +         */
> +        ctx->compression = 1;
> +        // DPRINTF("compression flag received");
> +        return pagebuf_get_one(xch, ctx, buf, fd, dom);
> +
> +    case XC_SAVE_ID_COMPRESSED_DATA:
> +
> +        /* read the length of compressed chunk coming in */
> +        if ( RDEXACT(fd, &compbuf_size, sizeof(unsigned long)) )
> +        {
> +            PERROR("Error when reading compbuf_size");
> +            return -1;
> +        }
> +        if (!compbuf_size) return 1;
> +
> +        buf->compbuf_size += compbuf_size;
> +        if (!buf->pages) {
> +            if (!(buf->pages = malloc(buf->compbuf_size))) {
> +                ERROR("Could not allocate compression buffer");
> +                return -1;
> +            }
> +        } else {
> +            if (!(ptmp = realloc(buf->pages, buf->compbuf_size))) {
> +                ERROR("Could not reallocate compression buffer");
> +                return -1;
> +            }
> +            buf->pages = ptmp;
> +        }
> +        if ( RDEXACT(fd, buf->pages + (buf->compbuf_size - compbuf_size),
> +                     compbuf_size) ) {
> +            PERROR("Error when reading compression buffer");
> +            return -1;
> +        }
> +        return compbuf_size;
> +
>      default:
>          if ( (count > MAX_BATCH_SIZE) || (count < 0) ) {
>              ERROR("Max batch size exceeded (%d). Giving up.", count);
> @@ -846,6 +892,13 @@
>      if (!countpages)
>          return count;
> 
> +    /* If Remus Checkpoint Compression is turned on, we only receive the
> +     * pfn lists now. The compressed pages will come in later, following a
> +     * <XC_SAVE_ID_COMPRESSED_DATA, compressedChunkSize> tuple.
> +     */
> +    if (buf->compression)
> +        return pagebuf_get_one(xch, ctx, buf, fd, dom);
> +
>      oldcount = buf->nr_physpages;
>      buf->nr_physpages += countpages;
>      if (!buf->pages) {
> @@ -874,6 +927,7 @@
>      int rc;
> 
>      buf->nr_physpages = buf->nr_pages = 0;
> +    buf->compbuf_pos = buf->compbuf_size = 0;
> 
>      do {
>          rc = pagebuf_get_one(xch, ctx, buf, fd, dom);
> @@ -1091,7 +1145,19 @@
>          /* In verify mode, we use a copy; otherwise we work in place */
>          page = pagebuf->verify ? (void *)buf : (region_base + i*PAGE_SIZE);
> 
> -        memcpy(page, pagebuf->pages + (curpage + curbatch) * PAGE_SIZE, 
> PAGE_SIZE);
> +        /* Remus - page decompression */
> +        if (pagebuf->compression)
> +        {
> +            if (xc_remus_uncompress(xch, pagebuf->pages, 
> pagebuf->compbuf_size,
> +                                    &pagebuf->compbuf_pos, (char *)page))
> +            {
> +                ERROR("Failed to uncompress page (pfn=%lx)\n", pfn);
> +                goto err_mapped;
> +            }
> +        }
> +        else
> +            memcpy(page, pagebuf->pages + (curpage + curbatch) * PAGE_SIZE,
> +                   PAGE_SIZE);
> 
>          pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
> 
> @@ -1353,6 +1419,7 @@
> 
>          if ( !ctx->completed ) {
>              pagebuf.nr_physpages = pagebuf.nr_pages = 0;
> +            pagebuf.compbuf_pos = pagebuf.compbuf_size = 0;
>              if ( pagebuf_get_one(xch, ctx, &pagebuf, io_fd, dom) < 0 ) {
>                  PERROR("Error when reading batch");
>                  goto out;
> @@ -1395,6 +1462,7 @@
>          }
> 
>          pagebuf.nr_physpages = pagebuf.nr_pages = 0;
> +        pagebuf.compbuf_pos = pagebuf.compbuf_size = 0;
> 
>          n += j; /* crude stats */
> 
> @@ -1438,6 +1506,13 @@
>           */
>          if ( !ctx->last_checkpoint )
>              fcntl(io_fd, F_SETFL, orig_io_fd_flags | O_NONBLOCK);
> +
> +        /*
> +         * If sender had sent enable compression flag, switch to compressed
> +         * checkpoints mode once the first checkpoint is received.
> +         */
> +        if (ctx->compression)
> +            pagebuf.compression = 1;
>      }
> 
>      if (pagebuf.acpi_ioport_location == 1) {
> diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xc_domain_save.c
> --- a/tools/libxc/xc_domain_save.c      Wed Jun 15 16:16:41 2011 +0100
> +++ b/tools/libxc/xc_domain_save.c      Sat Jun 18 20:51:59 2011 -0700
> @@ -269,6 +269,57 @@
>          return noncached_write(xch, ob, fd, buf, len);
>  }
> 
> +static int write_compressed(xc_interface *xch, void *remus_ctx, int dobuf,
> +                            struct outbuf* ob, int fd)
> +{
> +    int rc = 0;
> +    int header = sizeof(int) + sizeof(unsigned long);
> +    int marker = XC_SAVE_ID_COMPRESSED_DATA;
> +    unsigned long compbuf_len = 0;
> +
> +    do
> +    {
> +        /* check for available space (atleast 8k) */
> +        if ((ob->pos + header + XC_PAGE_SIZE * 2) > ob->size)
> +        {
> +            if (outbuf_flush(xch, ob, fd) < 0)
> +            {
> +                ERROR("Error when flushing outbuf intermediate");
> +                return -1;
> +            }
> +        }
> +
> +        xc_remus_compbuf_set(xch, remus_ctx, ob->buf + ob->pos + header,
> +                             ob->size - ob->pos - header);
> +        rc = xc_remus_compress(xch, remus_ctx);
> +        if (!rc)
> +            break;
> +        compbuf_len = xc_remus_get_compbuf_len(xch, remus_ctx);
> +
> +        if (outbuf_hardwrite(xch, ob, fd, &marker, sizeof(marker)) < 0)
> +        {
> +            PERROR("Error when writing marker (errno %d)", errno);
> +            return -1;
> +        }
> +
> +        if (outbuf_hardwrite(xch, ob, fd, &compbuf_len, sizeof(compbuf_len)) 
> < 0)
> +        {
> +            PERROR("Error when writing compbuf_len (errno %d)", errno);
> +            return -1;
> +        }
> +
> +        ob->pos += (size_t) compbuf_len;
> +        if (!dobuf && outbuf_flush(xch, ob, fd) < 0)
> +        {
> +            ERROR("Error when writing compressed chunk");
> +            return -1;
> +        }
> +    } while (rc != 0);
> +
> +    xc_remus_pagebuf_reset(xch, remus_ctx);
> +    return 0;
> +}
> +
>  struct time_stats {
>      struct timeval wall;
>      long long d0_cpu, d1_cpu;
> @@ -866,11 +917,19 @@
> 
>      unsigned long mfn;
> 
> -    struct outbuf ob;
> +    struct outbuf ob_pagebuf, ob_tailbuf, *ob = NULL;
>      struct save_ctx _ctx;
>      struct save_ctx *ctx = &_ctx;
>      struct domain_info_context *dinfo = &ctx->dinfo;
> 
> +    /* Remus context */
> +    void *remus_ctx = NULL;
> +    /* Even if XCFLAGS_REMUS_COMPRESS is set, we enable compression only
> +     * after sending XC_SAVE_ID_ENABLE_COMPRESSION and the tailbuf for
> +     * first time.
> +     */
> +    int compression = 0;
> +
>      int completed = 0;
> 
>      if ( hvm && !callbacks->switch_qemu_logdirty )
> @@ -880,7 +939,7 @@
>          return 1;
>      }
> 
> -    outbuf_init(xch, &ob, OUTBUF_SIZE);
> +    outbuf_init(xch, &ob_pagebuf, OUTBUF_SIZE);
> 
>      memset(ctx, 0, sizeof(*ctx));
> 
> @@ -968,6 +1027,16 @@
>          }
>      }
> 
> +    if ( flags & XCFLAGS_REMUS_COMPRESS )
> +    {
> +        if (!(remus_ctx = xc_remus_create_context(xch, dinfo->p2m_size)))
> +        {
> +            ERROR("Failed to create remus context");
> +            goto out;
> +        }
> +        outbuf_init(xch, &ob_tailbuf, OUTBUF_SIZE/4);
> +    }
> +
>      last_iter = !live;
> 
>      /* pretend we sent all the pages last iteration */
> @@ -1076,9 +1145,11 @@
>      }
> 
>    copypages:
> -#define wrexact(fd, buf, len) write_buffer(xch, last_iter, &ob, (fd), (buf), 
> (len))
> -#define wruncached(fd, live, buf, len) write_uncached(xch, last_iter, &ob, 
> (fd), (buf), (len))
> +#define wrexact(fd, buf, len) write_buffer(xch, last_iter, ob, (fd), (buf), 
> (len))
> +#define wruncached(fd, live, buf, len) write_uncached(xch, last_iter, ob, 
> (fd), (buf), (len))
> +#define wrcompressed(fd) write_compressed(xch, remus_ctx, last_iter, ob, 
> (fd))
> 
> +    ob = &ob_pagebuf; /* Holds pfn_types, pages/compressed pages */
>      /* Now write out each data page, canonicalising page tables as we go... 
> */
>      for ( ; ; )
>      {
> @@ -1321,7 +1392,7 @@
>                  {
>                      /* If the page is not a normal data page, write out any
>                         run of pages we may have previously acumulated */
> -                    if ( run )
> +                    if ( !compression && run )
>                      {
>                          if ( wruncached(io_fd, live,
>                                         
> (char*)region_base+(PAGE_SIZE*(j-run)),
> @@ -1356,7 +1427,32 @@
>                          goto out;
>                      }
> 
> -                    if ( wruncached(io_fd, live, page, PAGE_SIZE) != 
> PAGE_SIZE )
> +                    if (compression)
> +                    {
> +                        /* Mark pagetable page to be sent uncompressed */
> +                        if (xc_remus_add_page(xch, remus_ctx, page,
> +                                              pfn, 1 /* raw page */) < 0)
> +                        {
> +                            /*
> +                             * We are out of buffer space to hold dirty
> +                             * pages. Compress and flush the current buffer
> +                             * to make space. This is a corner case, that
> +                             * slows down checkpointing as the compression
> +                             * happens while domain is suspended. Happens
> +                             * seldom and if you find this occuring
> +                             * frequently, increase the PAGE_BUFFER_SIZE
> +                             * in xc_remus.c.
> +                             */
> +                            if (wrcompressed(io_fd) < 0)
> +                            {
> +                                ERROR("Error when writing compressed"
> +                                      " data (4b)\n");
> +                                goto out;
> +                            }
> +                        }
> +                    }
> +                    else if ( wruncached(io_fd, live, page,
> +                                         PAGE_SIZE) != PAGE_SIZE )
>                      {
>                          PERROR("Error when writing to state file (4b)"
>                                " (errno %d)", errno);
> @@ -1366,7 +1462,24 @@
>                  else
>                  {
>                      /* We have a normal page: accumulate it for writing. */
> -                    run++;
> +                    if (compression)
> +                    {
> +                        /* For remus/compression, accumulate the page in the
> +                         * page buffer, to be compressed later.
> +                         */
> +                        if (xc_remus_add_page(xch, remus_ctx, spage,
> +                                              pfn, 0 /* not raw page */) < 0)
> +                        {
> +                            if (wrcompressed(io_fd) < 0)
> +                            {
> +                                ERROR("Error when writing compressed"
> +                                      " data (4c)\n");
> +                                goto out;
> +                            }
> +                        }
> +                    }
> +                    else
> +                        run++;
>                  }
>              } /* end of the write out for this batch */
> 
> @@ -1474,6 +1587,15 @@
> 
>      DPRINTF("All memory is saved\n");
> 
> +    /* After last_iter, buffer the rest of pagebuf & tailbuf data into a
> +     * separate output buffer and flush it after the compressed page chunks.
> +     */
> +    if (compression)
> +    {
> +        ob = &ob_tailbuf;
> +        ob->pos = 0;
> +    }
> +
>      {
>          struct {
>              int id;
> @@ -1573,6 +1695,25 @@
>          }
>      }
> 
> +    /* Enable compression logic on both sides by sending this
> +     * one time marker.
> +     * NOTE: We could have simplified this procedure by sending
> +     * the enable/disable compression flag before the beginning of
> +     * the main for loop. But this would break compatibility for
> +     * live migration code, with older versions of xen. So we have
> +     * to enable it after the last_iter, when the XC_SAVE_ID_*
> +     * elements are sent.
> +     */
> +    if (!compression && (flags & XCFLAGS_REMUS_COMPRESS))
> +    {
> +        i = XC_SAVE_ID_ENABLE_COMPRESSION;
> +        if ( wrexact(io_fd, &i, sizeof(int)) )
> +        {
> +            PERROR("Error when writing enable_compression marker");
> +            goto out;
> +        }
> +    }
> +
>      /* Zero terminate */
>      i = 0;
>      if ( wrexact(io_fd, &i, sizeof(int)) )
> @@ -1817,14 +1958,38 @@
>      if ( !rc && callbacks->postcopy )
>          callbacks->postcopy(callbacks->data);
> 
> +    /* guest has been resumed. Now we can compress data
> +     * at our own pace.
> +     */
> +    if (!rc && compression)
> +    {
> +        ob = &ob_pagebuf;
> +        if (wrcompressed(io_fd) < 0)
> +        {
> +            ERROR("Error when writing compressed data, after postcopy\n");
> +            rc = 1;
> +            goto out;
> +        }
> +        /* Copy the tailbuf data into the main outbuf */
> +        if ( wrexact(io_fd, ob_tailbuf.buf, ob_tailbuf.pos) )
> +        {
> +            rc = 1;
> +            PERROR("Error when copying tailbuf into outbuf");
> +            goto out;
> +        }
> +    }
> +
>      /* Flush last write and discard cache for file. */
> -    if ( outbuf_flush(xch, &ob, io_fd) < 0 ) {
> +    if ( outbuf_flush(xch, ob, io_fd) < 0 ) {
>          PERROR("Error when flushing output buffer");
>          rc = 1;
>      }
> 
>      discard_file_cache(xch, io_fd, 1 /* flush */);
> 
> +    /* Enable compression now, finally */
> +    compression = (flags & XCFLAGS_REMUS_COMPRESS);
> +
>      /* checkpoint_cb can spend arbitrarily long in between rounds */
>      if (!rc && callbacks->checkpoint &&
>          callbacks->checkpoint(callbacks->data) > 0)
> @@ -1866,6 +2031,9 @@
>              DPRINTF("Warning - couldn't disable qemu log-dirty mode");
>      }
> 
> +    if (remus_ctx)
> +        xc_remus_free_context(xch, remus_ctx);
> +
>      if ( live_shinfo )
>          munmap(live_shinfo, PAGE_SIZE);
> 
> diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xc_remus.c
> --- /dev/null   Thu Jan 01 00:00:00 1970 +0000
> +++ b/tools/libxc/xc_remus.c    Sat Jun 18 20:51:59 2011 -0700
> @@ -0,0 +1,465 @@
> +/******************************************************************************
> + * xc_remus.c
> + *
> + * Checkpoint Compression using Page Delta Algorithm.
> + * - A LRU cache of recently dirtied guest pages is maintained.
> + * - For each dirty guest page in the checkpoint, if a previous version of 
> the
> + * page exists in the cache, XOR both pages and send the non-zero sections
> + * to the receiver. The cache is then updated with the newer copy of guest 
> page.
> + * - The receiver will XOR the non-zero sections against its copy of the 
> guest
> + * page, thereby bringing the guest page up-to-date with the sender side.
> + *
> + * Copyright (c) 2011 Shriram Rajagopalan (rshriram@xxxxxxxxx).
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation;
> + * version 2.1 of the License.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 
>  USA
> + *
> + */
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <unistd.h>
> +#include <sys/types.h>
> +#include <inttypes.h>
> +#include <errno.h>
> +#include "xenctrl.h"
> +#include "xg_save_restore.h"
> +#include "xg_private.h"
> +
> +/* Already defined in xc_dom.h, but it doesnt have
> + * a conditional include macro. So, redifine here.
> + */
> +#define INVALID_P2M_ENTRY   ((xen_pfn_t)-1)
> +
> +/* Page Cache for Delta Compression*/
> +#define DELTA_CACHE_SIZE (XC_PAGE_SIZE * 8192)
> +
> +struct cache_page;
> +struct cache_page
> +{
> +    char *page;
> +    unsigned long pfn;
> +    struct cache_page *next;
> +    struct cache_page *prev;
> +};
> +
> +/* After XORing the older and newer version, the non-zero sections
> + * are sent as a sequence of tuples <2-byte-offset,4-byte-data> called 
> markers.
> + * - Each page begins with a BEGIN marker (for synchronization).
> + * - If the result of XOR is a page filled with zeros (i.e no difference 
> between
> + *   old and new page, then only the BEGIN marker is sent for the page.
> + * - If the two versions of the page differ by more than 50%, the page is 
> sent
> + *   as is, with a FULLPAGE marker, without a BEGIN marker.
> + *
> + * About the choice of data types: typical page size is 4K. Each marker is
> + * 6 bytes long, with a 4-byte data word (1024 data words per page). If 50% 
> of
> + * the page changed, then we would be transmitting ~3000 bytes (worst case).
> + * - If we use 8-byte data word (10-byte marker), we end up sending
> + *   ~5000 bytes (>4096).
> + */
> +
> +typedef unsigned int data_t;
> +typedef short int moff_t;
> +
> +#define BEGIN -100
> +#define FULLPAGE -101
> +struct marker
> +{
> +    moff_t off;
> +    data_t val;
> +} __attribute__((packed));
> +
> +static struct marker begin_page = { BEGIN, -1};
> +static struct marker full_page = {FULLPAGE, -1};
> +
> +/* Internal page buffer to hold dirty pages of a checkpoint,
> + * to be compressed after the domain is resumed for execution.
> + */
> +#define PAGE_BUFFER_SIZE (XC_PAGE_SIZE * 8192)
> +
> +struct remus_context
> +{
> +    /* compression buffer - holds compressed data */
> +    char *compbuf;
> +    unsigned long compbuf_size;
> +    unsigned long compbuf_pos;
> +
> +    /* Page buffer to hold pages to be compressed */
> +    char *inputbuf;
> +    /* pfns of pages to be compressed */
> +    unsigned long *sendbuf_pfns;
> +    unsigned int pfns_index;
> +    unsigned int pfns_iterator;
> +
> +    /* Compression Cache (LRU) */
> +    char *cache_base;
> +    struct cache_page **pfn2cache;
> +    struct cache_page *cache2pfn;
> +    struct cache_page *page_list_head;
> +    struct cache_page *page_list_tail;
> +};
> +
> +static
> +int __compress(xc_interface *xch, struct remus_context *ctx, char *srcpage,
> +               char *copypage, int israw)
> +{
> +    struct marker *dest = (struct marker *)(ctx->compbuf+ ctx->compbuf_pos);
> +    moff_t off;
> +    int j=0, rc = 0;
> +    data_t *src, *copy;
> +
> +    src = (data_t*)srcpage;
> +    copy = (data_t*)copypage;
> +
> +    if ((ctx->compbuf_pos + sizeof(struct marker)) > ctx->compbuf_size)
> +        return -1;
> +
> +    if (!israw && copypage)
> +    {
> +        dest[j++] = begin_page;
> +        for (off = 0; off < XC_PAGE_SIZE/sizeof(data_t); off++)
> +        {
> +            if (copy[off] != src[off])
> +            {
> +                if ((ctx->compbuf_pos + (j + 1) *
> +                     sizeof(struct marker)) > ctx->compbuf_size)
> +                    return -1;
> +
> +                copy[off] = src[off];
> +                dest[j].off = off;
> +                dest[j].val = src[off];
> +                j++;
> +            }
> +            if (j > 500) /* more than 50% of page changed */
> +                goto FullPage;
> +        }
> +        rc = (j * sizeof(struct marker));
> +    }
> +    else
> +    {
> +    FullPage:
> +        if ( (ctx->compbuf_pos + sizeof(struct marker)
> +              + XC_PAGE_SIZE) > ctx->compbuf_size)
> +            return -1;
> +
> +        dest[0] = full_page;
> +        if (copypage)
> +            memcpy(copypage, srcpage, XC_PAGE_SIZE);
> +        memcpy((char *)&dest[1], srcpage, XC_PAGE_SIZE);
> +        rc = XC_PAGE_SIZE + sizeof(struct marker);
> +    }
> +    ctx->compbuf_pos += rc;
> +
> +    return rc;
> +}
> +
> +static
> +int __uncompress(xc_interface *xch, char *destpage, unsigned long 
> *compbuf_pos,
> +                 char *compbuf, unsigned long compbuf_size)
> +{
> +    struct marker *src = (struct marker *)(compbuf + *compbuf_pos);
> +    int i;
> +    data_t *dest = (data_t *)destpage;
> +
> +    if (*compbuf_pos >= compbuf_size)
> +    {
> +        ERROR("Out of bounds exception: read ptr:%lu, bufsize = %lu\n",
> +              *compbuf_pos, compbuf_size);
> +        return -1;
> +    }
> +
> +    if (src[0].off == BEGIN)
> +    {
> +        *compbuf_pos += sizeof(struct marker);
> +        for (i = 1; (*compbuf_pos < compbuf_size) && (src[i].off >= 0);
> +             i++, *compbuf_pos += sizeof(struct marker))
> +            dest[src[i].off] = src[i].val;
> +    }
> +    else if (src[0].off == FULLPAGE)
> +    {
> +        *compbuf_pos += sizeof(struct marker) + XC_PAGE_SIZE;
> +        memcpy(destpage, (char *)&src[1], XC_PAGE_SIZE);
> +    }
> +    else
> +    {
> +        ERROR("Invalid marker %d in compression buffer at %u\n",
> +              src[0].off, *compbuf_pos);
> +        return -1;
> +    }
> +    return 0;
> +}
> +
> +static
> +char *get_cache_page(struct remus_context *ctx, unsigned long pfn,
> +                     int *israw)
> +{
> +    struct cache_page *item = NULL;
> +
> +start:
> +    item = ctx->pfn2cache[pfn];
> +    /*         if requested item is in cache move to head of list */
> +    if (item)
> +    {
> +        /* item already at head of list */
> +        if (item == ctx->page_list_head)
> +            goto end;
> +        if (item == ctx->page_list_tail)
> +        {
> +            /* item at tail of list. */
> +            ctx->page_list_tail = item->prev;
> +            (ctx->page_list_tail)->next = NULL;
> +        }
> +        else
> +        {
> +            /* item in middle of list */
> +            item->prev->next = item->next;
> +            item->next->prev = item->prev;
> +        }
> +
> +        item->prev = NULL;
> +        item->next = ctx->page_list_head;
> +        (ctx->page_list_head)->prev = item;
> +        ctx->page_list_head = item;
> +        goto end;
> +    }
> +    else
> +    {
> +        *israw = 1;
> +        /* Add new item to list. If list is full,
> +         * evict a page from tail of list.
> +         */
> +        if ((ctx->page_list_tail)->pfn != INVALID_P2M_ENTRY)
> +            ctx->pfn2cache[(ctx->page_list_tail)->pfn] = NULL;
> +        (ctx->page_list_tail)->pfn = pfn;
> +        ctx->pfn2cache[pfn] = ctx->page_list_tail;
> +
> +        /* Will have same effect as cache hit at tail of list */
> +        goto start;
> +    }
> +end:
> +    return (ctx->page_list_head)->page;
> +}
> +
> +/* Remove pagetable pages from cache and move to tail, as free pages */
> +static
> +void invalidate_cache_page(struct remus_context *ctx, unsigned long pfn)
> +{
> +    struct cache_page *item = NULL;
> +
> +    item = ctx->pfn2cache[pfn];
> +    if (item)
> +    {
> +        /* item at head of list */
> +        if (item == ctx->page_list_head)
> +        {
> +            ctx->page_list_head = (ctx->page_list_head)->next;
> +            (ctx->page_list_head)->prev = NULL;
> +        }
> +        else if (item == ctx->page_list_tail)
> +        {
> +            /* item already at tail of list. */
> +            goto end;
> +        }
> +        else
> +        {
> +            /* item in middle of list */
> +            item->prev->next = item->next;
> +            item->next->prev = item->prev;
> +        }
> +        item->next = NULL;
> +        item->prev = ctx->page_list_tail;
> +        (ctx->page_list_tail)->next = item;
> +        ctx->page_list_tail = item;
> +    end:
> +        ctx->pfn2cache[pfn] = NULL;
> +        (ctx->page_list_tail)->pfn = INVALID_P2M_ENTRY;
> +    }
> +}
> +
> +int xc_remus_add_page(xc_interface *xch, void *remus_ctx, char *page,
> +                      unsigned long pfn, int israw)
> +{
> +    struct remus_context *ctx = (struct remus_context *)remus_ctx;
> +
> +    /* pagetable page */
> +    if (israw)
> +        invalidate_cache_page(ctx, pfn);
> +    ctx->sendbuf_pfns[ctx->pfns_index] = israw? INVALID_P2M_ENTRY : pfn;
> +    memcpy(ctx->inputbuf + ctx->pfns_index * XC_PAGE_SIZE, page, 
> XC_PAGE_SIZE);
> +    ctx->pfns_index++;
> +
> +    /* check if we have run out of space. If so,
> +     * we need to synchronously compress the pages and flush them out
> +     */
> +    if (ctx->pfns_index == NRPAGES(PAGE_BUFFER_SIZE))
> +        return -1;
> +    return 0;
> +}
> +
> +int xc_remus_compress(xc_interface *xch, void *remus_ctx)
> +{
> +    struct remus_context *ctx = (struct remus_context *)remus_ctx;
> +    char *cache_copy = NULL;
> +    int israw;
> +
> +    if (!ctx->pfns_index || (ctx->pfns_iterator == ctx->pfns_index))
> +        return 0;
> +
> +    for (; ctx->pfns_iterator < ctx->pfns_index; ctx->pfns_iterator++)
> +    {
> +        israw = 0;
> +        cache_copy = NULL;
> +        if (ctx->sendbuf_pfns[ctx->pfns_iterator] == INVALID_P2M_ENTRY)
> +            israw = 1;
> +        else
> +            cache_copy = get_cache_page(ctx, 
> ctx->sendbuf_pfns[ctx->pfns_iterator],
> +                                        &israw);
> +
> +        /* Out of space in outbuf! flush and come back */
> +        if (__compress(xch, ctx, ctx->inputbuf + ctx->pfns_iterator * 
> XC_PAGE_SIZE,
> +                       cache_copy, israw) < 0)
> +            return -1;
> +    }
> +
> +    return 1;
> +}
> +
> +inline
> +unsigned long xc_remus_get_compbuf_len(xc_interface *xch, void *remus_ctx)
> +{
> +    struct remus_context *ctx = (struct remus_context *)remus_ctx;
> +    return ctx->compbuf_pos;
> +}
> +
> +inline
> +void xc_remus_compbuf_set(xc_interface *xch, void *remus_ctx,
> +                          char *compbuf, unsigned long compbuf_size)
> +{
> +    struct remus_context *ctx = (struct remus_context *)remus_ctx;
> +    ctx->compbuf_pos = 0;
> +    ctx->compbuf = compbuf;
> +    ctx->compbuf_size = compbuf_size;
> +}
> +
> +inline
> +void xc_remus_pagebuf_reset(xc_interface *xch, void *remus_ctx)
> +{
> +    struct remus_context *ctx = (struct remus_context *)remus_ctx;
> +    ctx->pfns_index = ctx->pfns_iterator = 0;
> +}
> +
> +int xc_remus_uncompress(xc_interface *xch, char *compbuf,
> +                        unsigned long compbuf_size,
> +                        unsigned long *compbuf_pos, char *dest)
> +{
> +    return __uncompress(xch, dest, compbuf_pos, compbuf, compbuf_size);
> +}
> +
> +void xc_remus_free_context(xc_interface *xch, void *ctx)
> +{
> +    struct remus_context *remus_ctx = (struct remus_context *)ctx;
> +
> +    if (!remus_ctx) return;
> +
> +    if (remus_ctx->inputbuf)
> +        free(remus_ctx->inputbuf);
> +    if (remus_ctx->sendbuf_pfns)
> +        free(remus_ctx->sendbuf_pfns);
> +    if (remus_ctx->cache_base)
> +        free(remus_ctx->cache_base);
> +    if (remus_ctx->pfn2cache)
> +        free(remus_ctx->pfn2cache);
> +    if (remus_ctx->cache2pfn)
> +        free(remus_ctx->cache2pfn);
> +    free(remus_ctx);
> +}
> +
> +void *xc_remus_create_context(xc_interface *xch, unsigned long p2m_size)
> +{
> +    unsigned long i;
> +    struct remus_context *remus_ctx = NULL;
> +    unsigned long num_cache_pages = DELTA_CACHE_SIZE/XC_PAGE_SIZE;
> +
> +    remus_ctx = malloc(sizeof(struct remus_context));
> +    if (!remus_ctx)
> +    {
> +        ERROR("Failed to allocate remus_ctx\n");
> +        goto error;
> +    }
> +    memset(remus_ctx, 0, sizeof(struct remus_context));
> +
> +    if (posix_memalign((void **)&remus_ctx->inputbuf,
> +                       XC_PAGE_SIZE, PAGE_BUFFER_SIZE))
> +    {
> +        ERROR("Failed to allocate page buffer\n");
> +        goto error;
> +    }
> +
> +    remus_ctx->sendbuf_pfns = malloc(NRPAGES(PAGE_BUFFER_SIZE) *
> +                                     sizeof(unsigned long));
> +    if (!remus_ctx->sendbuf_pfns)
> +    {
> +        ERROR("Could not alloc sendbuf_pfns\n");
> +        goto error;
> +    }
> +    memset(remus_ctx->sendbuf_pfns, -1,
> +           NRPAGES(PAGE_BUFFER_SIZE) * sizeof(unsigned long));
> +
> +    if (posix_memalign((void **)&remus_ctx->cache_base,
> +                       XC_PAGE_SIZE, DELTA_CACHE_SIZE))
> +    {
> +        ERROR("Failed to allocate delta cache\n");
> +        goto error;
> +    }
> +
> +    remus_ctx->pfn2cache = calloc(p2m_size, sizeof(struct cache_page *));
> +    if (!remus_ctx->pfn2cache)
> +    {
> +        ERROR("Could not alloc pfn2cache map\n");
> +        goto error;
> +    }
> +
> +    remus_ctx->cache2pfn = malloc(num_cache_pages * sizeof(struct 
> cache_page));
> +    if (!remus_ctx->cache2pfn)
> +    {
> +        ERROR("Could not alloc cache2pfn map\n");
> +        goto error;
> +    }
> +
> +    for (i = 0; i < num_cache_pages; i++)
> +    {
> +        remus_ctx->cache2pfn[i].pfn = INVALID_P2M_ENTRY;
> +        remus_ctx->cache2pfn[i].page = remus_ctx->cache_base + i * 
> XC_PAGE_SIZE;
> +        remus_ctx->cache2pfn[i].prev = (i == 0)? NULL : 
> &(remus_ctx->cache2pfn[i - 1]);
> +        remus_ctx->cache2pfn[i].next = ((i+1) == num_cache_pages)? NULL :
> +            &(remus_ctx->cache2pfn[i + 1]);
> +    }
> +    remus_ctx->page_list_head = &(remus_ctx->cache2pfn[0]);
> +    remus_ctx->page_list_tail = &(remus_ctx->cache2pfn[num_cache_pages -1]);
> +
> +    return (void *)remus_ctx;
> +error:
> +    xc_remus_free_context(xch, remus_ctx);
> +    return NULL;
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-set-style: "BSD"
> + * c-basic-offset: 4
> + * tab-width: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xenctrl.h
> --- a/tools/libxc/xenctrl.h     Wed Jun 15 16:16:41 2011 +0100
> +++ b/tools/libxc/xenctrl.h     Sat Jun 18 20:51:59 2011 -0700
> @@ -1820,4 +1820,58 @@
>                          int verbose);
>  /* Useful for callers who also use libelf. */
> 
> +/**
> + * Remus Checkpoint Compression
> + */
> +void *xc_remus_create_context(xc_interface *xch, unsigned long p2m_size);
> +void xc_remus_free_context(xc_interface *xch, void *remus_ctx);
> +
> +/**
> + * Add a page to remus buffer, to be compressed later.
> + * returns -1 if there is no space in buffer.
> + */
> +int xc_remus_add_page(xc_interface *xch, void *remus_ctx, char *page,
> +                     unsigned long pfn, int israw);
> +
> +/**
> + * Should be called before compressing the pages. Caller supplies a
> + * compression buffer compbuf of size compbuf_size.
> + */
> +void xc_remus_compbuf_set(xc_interface *xch, void *remus_ctx, char *compbuf,
> +                           unsigned long compbuf_size);
> +
> +/**
> + * Delta compress pages in the remus buffer and inserts the
> + * compressed data into the previously supplied compression buffer, compbuf.
> + * After compression, the page is copied to the internal LRU cache.
> + *
> + * This function compresses as many pages as possible into the
> + * supplied compression buffer. It maintains an internal iterator to
> + * keep track of pages in the input buffer that are yet to be compressed.
> + *
> + * returns -1 if the compression buffer has run out of space.
> + * returns 1 on success.
> + * returns 0 if no more pages are left to be compressed.
> + */
> +int xc_remus_compress(xc_interface *xch, void *remus_ctx);
> +
> +/**
> + * Returns the exact length of data, in the compression buffer.
> + */
> +unsigned long xc_remus_get_compbuf_len(xc_interface *xch, void *remus_ctx);
> +
> +/**
> + * Resets the internal page buffer that holds dirty pages before compression.
> + * Also resets the iterators.
> + */
> +void xc_remus_pagebuf_reset(xc_interface *xch, void *remus_ctx);
> +
> +/**
> + * Caller must supply the compression buffer (compbuf), its size 
> (compbuf_size) and
> + * an reference to index variable (compbuf_pos) that is used internally.
> + * Each call pulls out one page from the compressed chunk and copies it to 
> dest.
> + */
> +int xc_remus_uncompress(xc_interface *xch, char *compbuf, unsigned long 
> compbuf_size,
> +                       unsigned long *compbuf_pos, char *dest);
> +
>  #endif /* XENCTRL_H */
> diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xenguest.h
> --- a/tools/libxc/xenguest.h    Wed Jun 15 16:16:41 2011 +0100
> +++ b/tools/libxc/xenguest.h    Sat Jun 18 20:51:59 2011 -0700
> @@ -27,6 +27,7 @@
>  #define XCFLAGS_DEBUG     2
>  #define XCFLAGS_HVM       4
>  #define XCFLAGS_STDVGA    8
> +#define XCFLAGS_REMUS_COMPRESS    16
>  #define X86_64_B_SIZE   64
>  #define X86_32_B_SIZE   32
> 
> diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xg_save_restore.h
> --- a/tools/libxc/xg_save_restore.h     Wed Jun 15 16:16:41 2011 +0100
> +++ b/tools/libxc/xg_save_restore.h     Sat Jun 18 20:51:59 2011 -0700
> @@ -134,6 +134,8 @@
>  #define XC_SAVE_ID_HVM_CONSOLE_PFN    -8 /* (HVM-only) */
>  #define XC_SAVE_ID_LAST_CHECKPOINT    -9 /* Commit to restoring after 
> completion of current iteration. */
>  #define XC_SAVE_ID_HVM_ACPI_IOPORTS_LOCATION -10
> +#define XC_SAVE_ID_COMPRESSED_DATA    -11 /* Marker to indicate arrival of 
> compressed data */
> +#define XC_SAVE_ID_ENABLE_COMPRESSION -12 /* Marker to enable compression 
> logic at receiver side */
> 
>  /*
>  ** We process save/restore/migrate in batches of pages; the below



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.