Xen project Mailing List

[Xen-devel] Re: [PATCH 1 of 2] tools/libxc: Remus Checkpoint Compression

To: Shriram Rajagopalan <rshriram@xxxxxxxxx>

From: George Dunlap <george.dunlap@xxxxxxxxxx>

Date: Mon, 20 Jun 2011 11:32:24 +0100

Cc: George Dunlap <George.Dunlap@xxxxxxxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>, Ian Jackson <Ian.Jackson@xxxxxxxxxxxxx>

Delivery-date: Mon, 20 Jun 2011 03:32:10 -0700

List-id: Xen developer discussion <xen-devel.lists.xensource.com>

Thanks Shriram. Acked-by: George Dunlap <george.dunlap@xxxxxxxxxxxxx> On Sun, 2011-06-19 at 05:32 +0100, Shriram Rajagopalan wrote: > # HG changeset patch > # User Shriram Rajagopalan <rshriram@xxxxxxxxx> > # Date 1308455519 25200 > # Node ID b4974a38d10199c1e2b8fd3cf36d91c03ac5eeae > # Parent 23c068b109236657ededa3e3b7f180346a5cd9f9 > tools/libxc: Remus Checkpoint Compression > > Instead of sending dirty pages of guest memory as-is, use a simple compression > algorithm that sends a RLE-encoded XOR of the page against its last sent copy. > A small LRU cache is used to hold recently dirtied pages. Pagetable pages are > sent as-is, as they are canonicalized at sender side and uncanonicalized at > receiver. > > Signed-off-by: Shriram Rajagopalan <rshriram@xxxxxxxxx> > > diff -r 23c068b10923 -r b4974a38d101 tools/libxc/Makefile > --- a/tools/libxc/Makefile Wed Jun 15 16:16:41 2011 +0100 > +++ b/tools/libxc/Makefile Sat Jun 18 20:51:59 2011 -0700 > @@ -42,7 +42,7 @@ > GUEST_SRCS-y := > GUEST_SRCS-y += xg_private.c xc_suspend.c > GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_domain_save.c > -GUEST_SRCS-$(CONFIG_MIGRATE) += xc_offline_page.c > +GUEST_SRCS-$(CONFIG_MIGRATE) += xc_offline_page.c xc_remus.c > GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c > > vpath %.c ../../xen/common/libelf > diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xc_domain_restore.c > --- a/tools/libxc/xc_domain_restore.c Wed Jun 15 16:16:41 2011 +0100 > +++ b/tools/libxc/xc_domain_restore.c Sat Jun 18 20:51:59 2011 -0700 > @@ -43,6 +43,7 @@ > xen_pfn_t *p2m_batch; /* A table of P2M mappings in the current region. > */ > int completed; /* Set when a consistent image is available */ > int last_checkpoint; /* Set when we should commit to the current > checkpoint when it completes. */ > + int compression; /* Set when sender signals that pages would be sent > compressed (for Remus) */ > struct domain_info_context dinfo; > }; > > @@ -663,6 +664,10 @@ > /* pages is of length nr_physpages, pfn_types is of length nr_pages */ > unsigned int nr_physpages, nr_pages; > > + /* remus compression state */ > + int compression; > + unsigned long compbuf_pos, compbuf_size; > + > /* Types of the pfns in the current region */ > unsigned long* pfn_types; > > @@ -700,6 +705,7 @@ > { > int count, countpages, oldcount, i; > void* ptmp; > + unsigned long compbuf_size; > > if ( RDEXACT(fd, &count, sizeof(count)) ) > { > @@ -809,6 +815,46 @@ > } > return pagebuf_get_one(xch, ctx, buf, fd, dom); > > + case XC_SAVE_ID_ENABLE_COMPRESSION: > + /* We cannot set compression flag directly in pagebuf structure, > + * since this pagebuf still has uncompressed pages that are yet to > + * be applied. We enable the compression field in pagebuf structure > + * after receiving the first tailbuf. > + */ > + ctx->compression = 1; > + // DPRINTF("compression flag received"); > + return pagebuf_get_one(xch, ctx, buf, fd, dom); > + > + case XC_SAVE_ID_COMPRESSED_DATA: > + > + /* read the length of compressed chunk coming in */ > + if ( RDEXACT(fd, &compbuf_size, sizeof(unsigned long)) ) > + { > + PERROR("Error when reading compbuf_size"); > + return -1; > + } > + if (!compbuf_size) return 1; > + > + buf->compbuf_size += compbuf_size; > + if (!buf->pages) { > + if (!(buf->pages = malloc(buf->compbuf_size))) { > + ERROR("Could not allocate compression buffer"); > + return -1; > + } > + } else { > + if (!(ptmp = realloc(buf->pages, buf->compbuf_size))) { > + ERROR("Could not reallocate compression buffer"); > + return -1; > + } > + buf->pages = ptmp; > + } > + if ( RDEXACT(fd, buf->pages + (buf->compbuf_size - compbuf_size), > + compbuf_size) ) { > + PERROR("Error when reading compression buffer"); > + return -1; > + } > + return compbuf_size; > + > default: > if ( (count > MAX_BATCH_SIZE) || (count < 0) ) { > ERROR("Max batch size exceeded (%d). Giving up.", count); > @@ -846,6 +892,13 @@ > if (!countpages) > return count; > > + /* If Remus Checkpoint Compression is turned on, we only receive the > + * pfn lists now. The compressed pages will come in later, following a > + * <XC_SAVE_ID_COMPRESSED_DATA, compressedChunkSize> tuple. > + */ > + if (buf->compression) > + return pagebuf_get_one(xch, ctx, buf, fd, dom); > + > oldcount = buf->nr_physpages; > buf->nr_physpages += countpages; > if (!buf->pages) { > @@ -874,6 +927,7 @@ > int rc; > > buf->nr_physpages = buf->nr_pages = 0; > + buf->compbuf_pos = buf->compbuf_size = 0; > > do { > rc = pagebuf_get_one(xch, ctx, buf, fd, dom); > @@ -1091,7 +1145,19 @@ > /* In verify mode, we use a copy; otherwise we work in place */ > page = pagebuf->verify ? (void *)buf : (region_base + i*PAGE_SIZE); > > - memcpy(page, pagebuf->pages + (curpage + curbatch) * PAGE_SIZE, > PAGE_SIZE); > + /* Remus - page decompression */ > + if (pagebuf->compression) > + { > + if (xc_remus_uncompress(xch, pagebuf->pages, > pagebuf->compbuf_size, > + &pagebuf->compbuf_pos, (char *)page)) > + { > + ERROR("Failed to uncompress page (pfn=%lx)\n", pfn); > + goto err_mapped; > + } > + } > + else > + memcpy(page, pagebuf->pages + (curpage + curbatch) * PAGE_SIZE, > + PAGE_SIZE); > > pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK; > > @@ -1353,6 +1419,7 @@ > > if ( !ctx->completed ) { > pagebuf.nr_physpages = pagebuf.nr_pages = 0; > + pagebuf.compbuf_pos = pagebuf.compbuf_size = 0; > if ( pagebuf_get_one(xch, ctx, &pagebuf, io_fd, dom) < 0 ) { > PERROR("Error when reading batch"); > goto out; > @@ -1395,6 +1462,7 @@ > } > > pagebuf.nr_physpages = pagebuf.nr_pages = 0; > + pagebuf.compbuf_pos = pagebuf.compbuf_size = 0; > > n += j; /* crude stats */ > > @@ -1438,6 +1506,13 @@ > */ > if ( !ctx->last_checkpoint ) > fcntl(io_fd, F_SETFL, orig_io_fd_flags | O_NONBLOCK); > + > + /* > + * If sender had sent enable compression flag, switch to compressed > + * checkpoints mode once the first checkpoint is received. > + */ > + if (ctx->compression) > + pagebuf.compression = 1; > } > > if (pagebuf.acpi_ioport_location == 1) { > diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xc_domain_save.c > --- a/tools/libxc/xc_domain_save.c Wed Jun 15 16:16:41 2011 +0100 > +++ b/tools/libxc/xc_domain_save.c Sat Jun 18 20:51:59 2011 -0700 > @@ -269,6 +269,57 @@ > return noncached_write(xch, ob, fd, buf, len); > } > > +static int write_compressed(xc_interface *xch, void *remus_ctx, int dobuf, > + struct outbuf* ob, int fd) > +{ > + int rc = 0; > + int header = sizeof(int) + sizeof(unsigned long); > + int marker = XC_SAVE_ID_COMPRESSED_DATA; > + unsigned long compbuf_len = 0; > + > + do > + { > + /* check for available space (atleast 8k) */ > + if ((ob->pos + header + XC_PAGE_SIZE * 2) > ob->size) > + { > + if (outbuf_flush(xch, ob, fd) < 0) > + { > + ERROR("Error when flushing outbuf intermediate"); > + return -1; > + } > + } > + > + xc_remus_compbuf_set(xch, remus_ctx, ob->buf + ob->pos + header, > + ob->size - ob->pos - header); > + rc = xc_remus_compress(xch, remus_ctx); > + if (!rc) > + break; > + compbuf_len = xc_remus_get_compbuf_len(xch, remus_ctx); > + > + if (outbuf_hardwrite(xch, ob, fd, &marker, sizeof(marker)) < 0) > + { > + PERROR("Error when writing marker (errno %d)", errno); > + return -1; > + } > + > + if (outbuf_hardwrite(xch, ob, fd, &compbuf_len, sizeof(compbuf_len)) > < 0) > + { > + PERROR("Error when writing compbuf_len (errno %d)", errno); > + return -1; > + } > + > + ob->pos += (size_t) compbuf_len; > + if (!dobuf && outbuf_flush(xch, ob, fd) < 0) > + { > + ERROR("Error when writing compressed chunk"); > + return -1; > + } > + } while (rc != 0); > + > + xc_remus_pagebuf_reset(xch, remus_ctx); > + return 0; > +} > + > struct time_stats { > struct timeval wall; > long long d0_cpu, d1_cpu; > @@ -866,11 +917,19 @@ > > unsigned long mfn; > > - struct outbuf ob; > + struct outbuf ob_pagebuf, ob_tailbuf, *ob = NULL; > struct save_ctx _ctx; > struct save_ctx *ctx = &_ctx; > struct domain_info_context *dinfo = &ctx->dinfo; > > + /* Remus context */ > + void *remus_ctx = NULL; > + /* Even if XCFLAGS_REMUS_COMPRESS is set, we enable compression only > + * after sending XC_SAVE_ID_ENABLE_COMPRESSION and the tailbuf for > + * first time. > + */ > + int compression = 0; > + > int completed = 0; > > if ( hvm && !callbacks->switch_qemu_logdirty ) > @@ -880,7 +939,7 @@ > return 1; > } > > - outbuf_init(xch, &ob, OUTBUF_SIZE); > + outbuf_init(xch, &ob_pagebuf, OUTBUF_SIZE); > > memset(ctx, 0, sizeof(*ctx)); > > @@ -968,6 +1027,16 @@ > } > } > > + if ( flags & XCFLAGS_REMUS_COMPRESS ) > + { > + if (!(remus_ctx = xc_remus_create_context(xch, dinfo->p2m_size))) > + { > + ERROR("Failed to create remus context"); > + goto out; > + } > + outbuf_init(xch, &ob_tailbuf, OUTBUF_SIZE/4); > + } > + > last_iter = !live; > > /* pretend we sent all the pages last iteration */ > @@ -1076,9 +1145,11 @@ > } > > copypages: > -#define wrexact(fd, buf, len) write_buffer(xch, last_iter, &ob, (fd), (buf), > (len)) > -#define wruncached(fd, live, buf, len) write_uncached(xch, last_iter, &ob, > (fd), (buf), (len)) > +#define wrexact(fd, buf, len) write_buffer(xch, last_iter, ob, (fd), (buf), > (len)) > +#define wruncached(fd, live, buf, len) write_uncached(xch, last_iter, ob, > (fd), (buf), (len)) > +#define wrcompressed(fd) write_compressed(xch, remus_ctx, last_iter, ob, > (fd)) > > + ob = &ob_pagebuf; /* Holds pfn_types, pages/compressed pages */ > /* Now write out each data page, canonicalising page tables as we go... > */ > for ( ; ; ) > { > @@ -1321,7 +1392,7 @@ > { > /* If the page is not a normal data page, write out any > run of pages we may have previously acumulated */ > - if ( run ) > + if ( !compression && run ) > { > if ( wruncached(io_fd, live, > > (char*)region_base+(PAGE_SIZE*(j-run)), > @@ -1356,7 +1427,32 @@ > goto out; > } > > - if ( wruncached(io_fd, live, page, PAGE_SIZE) != > PAGE_SIZE ) > + if (compression) > + { > + /* Mark pagetable page to be sent uncompressed */ > + if (xc_remus_add_page(xch, remus_ctx, page, > + pfn, 1 /* raw page */) < 0) > + { > + /* > + * We are out of buffer space to hold dirty > + * pages. Compress and flush the current buffer > + * to make space. This is a corner case, that > + * slows down checkpointing as the compression > + * happens while domain is suspended. Happens > + * seldom and if you find this occuring > + * frequently, increase the PAGE_BUFFER_SIZE > + * in xc_remus.c. > + */ > + if (wrcompressed(io_fd) < 0) > + { > + ERROR("Error when writing compressed" > + " data (4b)\n"); > + goto out; > + } > + } > + } > + else if ( wruncached(io_fd, live, page, > + PAGE_SIZE) != PAGE_SIZE ) > { > PERROR("Error when writing to state file (4b)" > " (errno %d)", errno); > @@ -1366,7 +1462,24 @@ > else > { > /* We have a normal page: accumulate it for writing. */ > - run++; > + if (compression) > + { > + /* For remus/compression, accumulate the page in the > + * page buffer, to be compressed later. > + */ > + if (xc_remus_add_page(xch, remus_ctx, spage, > + pfn, 0 /* not raw page */) < 0) > + { > + if (wrcompressed(io_fd) < 0) > + { > + ERROR("Error when writing compressed" > + " data (4c)\n"); > + goto out; > + } > + } > + } > + else > + run++; > } > } /* end of the write out for this batch */ > > @@ -1474,6 +1587,15 @@ > > DPRINTF("All memory is saved\n"); > > + /* After last_iter, buffer the rest of pagebuf & tailbuf data into a > + * separate output buffer and flush it after the compressed page chunks. > + */ > + if (compression) > + { > + ob = &ob_tailbuf; > + ob->pos = 0; > + } > + > { > struct { > int id; > @@ -1573,6 +1695,25 @@ > } > } > > + /* Enable compression logic on both sides by sending this > + * one time marker. > + * NOTE: We could have simplified this procedure by sending > + * the enable/disable compression flag before the beginning of > + * the main for loop. But this would break compatibility for > + * live migration code, with older versions of xen. So we have > + * to enable it after the last_iter, when the XC_SAVE_ID_* > + * elements are sent. > + */ > + if (!compression && (flags & XCFLAGS_REMUS_COMPRESS)) > + { > + i = XC_SAVE_ID_ENABLE_COMPRESSION; > + if ( wrexact(io_fd, &i, sizeof(int)) ) > + { > + PERROR("Error when writing enable_compression marker"); > + goto out; > + } > + } > + > /* Zero terminate */ > i = 0; > if ( wrexact(io_fd, &i, sizeof(int)) ) > @@ -1817,14 +1958,38 @@ > if ( !rc && callbacks->postcopy ) > callbacks->postcopy(callbacks->data); > > + /* guest has been resumed. Now we can compress data > + * at our own pace. > + */ > + if (!rc && compression) > + { > + ob = &ob_pagebuf; > + if (wrcompressed(io_fd) < 0) > + { > + ERROR("Error when writing compressed data, after postcopy\n"); > + rc = 1; > + goto out; > + } > + /* Copy the tailbuf data into the main outbuf */ > + if ( wrexact(io_fd, ob_tailbuf.buf, ob_tailbuf.pos) ) > + { > + rc = 1; > + PERROR("Error when copying tailbuf into outbuf"); > + goto out; > + } > + } > + > /* Flush last write and discard cache for file. */ > - if ( outbuf_flush(xch, &ob, io_fd) < 0 ) { > + if ( outbuf_flush(xch, ob, io_fd) < 0 ) { > PERROR("Error when flushing output buffer"); > rc = 1; > } > > discard_file_cache(xch, io_fd, 1 /* flush */); > > + /* Enable compression now, finally */ > + compression = (flags & XCFLAGS_REMUS_COMPRESS); > + > /* checkpoint_cb can spend arbitrarily long in between rounds */ > if (!rc && callbacks->checkpoint && > callbacks->checkpoint(callbacks->data) > 0) > @@ -1866,6 +2031,9 @@ > DPRINTF("Warning - couldn't disable qemu log-dirty mode"); > } > > + if (remus_ctx) > + xc_remus_free_context(xch, remus_ctx); > + > if ( live_shinfo ) > munmap(live_shinfo, PAGE_SIZE); > > diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xc_remus.c > --- /dev/null Thu Jan 01 00:00:00 1970 +0000 > +++ b/tools/libxc/xc_remus.c Sat Jun 18 20:51:59 2011 -0700 > @@ -0,0 +1,465 @@ > +/****************************************************************************** > + * xc_remus.c > + * > + * Checkpoint Compression using Page Delta Algorithm. > + * - A LRU cache of recently dirtied guest pages is maintained. > + * - For each dirty guest page in the checkpoint, if a previous version of > the > + * page exists in the cache, XOR both pages and send the non-zero sections > + * to the receiver. The cache is then updated with the newer copy of guest > page. > + * - The receiver will XOR the non-zero sections against its copy of the > guest > + * page, thereby bringing the guest page up-to-date with the sender side. > + * > + * Copyright (c) 2011 Shriram Rajagopalan (rshriram@xxxxxxxxx). > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; > + * version 2.1 of the License. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library; if not, write to the Free Software > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 > USA > + * > + */ > + > +#include <stdio.h> > +#include <stdlib.h> > +#include <unistd.h> > +#include <sys/types.h> > +#include <inttypes.h> > +#include <errno.h> > +#include "xenctrl.h" > +#include "xg_save_restore.h" > +#include "xg_private.h" > + > +/* Already defined in xc_dom.h, but it doesnt have > + * a conditional include macro. So, redifine here. > + */ > +#define INVALID_P2M_ENTRY ((xen_pfn_t)-1) > + > +/* Page Cache for Delta Compression*/ > +#define DELTA_CACHE_SIZE (XC_PAGE_SIZE * 8192) > + > +struct cache_page; > +struct cache_page > +{ > + char *page; > + unsigned long pfn; > + struct cache_page *next; > + struct cache_page *prev; > +}; > + > +/* After XORing the older and newer version, the non-zero sections > + * are sent as a sequence of tuples <2-byte-offset,4-byte-data> called > markers. > + * - Each page begins with a BEGIN marker (for synchronization). > + * - If the result of XOR is a page filled with zeros (i.e no difference > between > + * old and new page, then only the BEGIN marker is sent for the page. > + * - If the two versions of the page differ by more than 50%, the page is > sent > + * as is, with a FULLPAGE marker, without a BEGIN marker. > + * > + * About the choice of data types: typical page size is 4K. Each marker is > + * 6 bytes long, with a 4-byte data word (1024 data words per page). If 50% > of > + * the page changed, then we would be transmitting ~3000 bytes (worst case). > + * - If we use 8-byte data word (10-byte marker), we end up sending > + * ~5000 bytes (>4096). > + */ > + > +typedef unsigned int data_t; > +typedef short int moff_t; > + > +#define BEGIN -100 > +#define FULLPAGE -101 > +struct marker > +{ > + moff_t off; > + data_t val; > +} __attribute__((packed)); > + > +static struct marker begin_page = { BEGIN, -1}; > +static struct marker full_page = {FULLPAGE, -1}; > + > +/* Internal page buffer to hold dirty pages of a checkpoint, > + * to be compressed after the domain is resumed for execution. > + */ > +#define PAGE_BUFFER_SIZE (XC_PAGE_SIZE * 8192) > + > +struct remus_context > +{ > + /* compression buffer - holds compressed data */ > + char *compbuf; > + unsigned long compbuf_size; > + unsigned long compbuf_pos; > + > + /* Page buffer to hold pages to be compressed */ > + char *inputbuf; > + /* pfns of pages to be compressed */ > + unsigned long *sendbuf_pfns; > + unsigned int pfns_index; > + unsigned int pfns_iterator; > + > + /* Compression Cache (LRU) */ > + char *cache_base; > + struct cache_page **pfn2cache; > + struct cache_page *cache2pfn; > + struct cache_page *page_list_head; > + struct cache_page *page_list_tail; > +}; > + > +static > +int __compress(xc_interface *xch, struct remus_context *ctx, char *srcpage, > + char *copypage, int israw) > +{ > + struct marker *dest = (struct marker *)(ctx->compbuf+ ctx->compbuf_pos); > + moff_t off; > + int j=0, rc = 0; > + data_t *src, *copy; > + > + src = (data_t*)srcpage; > + copy = (data_t*)copypage; > + > + if ((ctx->compbuf_pos + sizeof(struct marker)) > ctx->compbuf_size) > + return -1; > + > + if (!israw && copypage) > + { > + dest[j++] = begin_page; > + for (off = 0; off < XC_PAGE_SIZE/sizeof(data_t); off++) > + { > + if (copy[off] != src[off]) > + { > + if ((ctx->compbuf_pos + (j + 1) * > + sizeof(struct marker)) > ctx->compbuf_size) > + return -1; > + > + copy[off] = src[off]; > + dest[j].off = off; > + dest[j].val = src[off]; > + j++; > + } > + if (j > 500) /* more than 50% of page changed */ > + goto FullPage; > + } > + rc = (j * sizeof(struct marker)); > + } > + else > + { > + FullPage: > + if ( (ctx->compbuf_pos + sizeof(struct marker) > + + XC_PAGE_SIZE) > ctx->compbuf_size) > + return -1; > + > + dest[0] = full_page; > + if (copypage) > + memcpy(copypage, srcpage, XC_PAGE_SIZE); > + memcpy((char *)&dest[1], srcpage, XC_PAGE_SIZE); > + rc = XC_PAGE_SIZE + sizeof(struct marker); > + } > + ctx->compbuf_pos += rc; > + > + return rc; > +} > + > +static > +int __uncompress(xc_interface *xch, char *destpage, unsigned long > *compbuf_pos, > + char *compbuf, unsigned long compbuf_size) > +{ > + struct marker *src = (struct marker *)(compbuf + *compbuf_pos); > + int i; > + data_t *dest = (data_t *)destpage; > + > + if (*compbuf_pos >= compbuf_size) > + { > + ERROR("Out of bounds exception: read ptr:%lu, bufsize = %lu\n", > + *compbuf_pos, compbuf_size); > + return -1; > + } > + > + if (src[0].off == BEGIN) > + { > + *compbuf_pos += sizeof(struct marker); > + for (i = 1; (*compbuf_pos < compbuf_size) && (src[i].off >= 0); > + i++, *compbuf_pos += sizeof(struct marker)) > + dest[src[i].off] = src[i].val; > + } > + else if (src[0].off == FULLPAGE) > + { > + *compbuf_pos += sizeof(struct marker) + XC_PAGE_SIZE; > + memcpy(destpage, (char *)&src[1], XC_PAGE_SIZE); > + } > + else > + { > + ERROR("Invalid marker %d in compression buffer at %u\n", > + src[0].off, *compbuf_pos); > + return -1; > + } > + return 0; > +} > + > +static > +char *get_cache_page(struct remus_context *ctx, unsigned long pfn, > + int *israw) > +{ > + struct cache_page *item = NULL; > + > +start: > + item = ctx->pfn2cache[pfn]; > + /* if requested item is in cache move to head of list */ > + if (item) > + { > + /* item already at head of list */ > + if (item == ctx->page_list_head) > + goto end; > + if (item == ctx->page_list_tail) > + { > + /* item at tail of list. */ > + ctx->page_list_tail = item->prev; > + (ctx->page_list_tail)->next = NULL; > + } > + else > + { > + /* item in middle of list */ > + item->prev->next = item->next; > + item->next->prev = item->prev; > + } > + > + item->prev = NULL; > + item->next = ctx->page_list_head; > + (ctx->page_list_head)->prev = item; > + ctx->page_list_head = item; > + goto end; > + } > + else > + { > + *israw = 1; > + /* Add new item to list. If list is full, > + * evict a page from tail of list. > + */ > + if ((ctx->page_list_tail)->pfn != INVALID_P2M_ENTRY) > + ctx->pfn2cache[(ctx->page_list_tail)->pfn] = NULL; > + (ctx->page_list_tail)->pfn = pfn; > + ctx->pfn2cache[pfn] = ctx->page_list_tail; > + > + /* Will have same effect as cache hit at tail of list */ > + goto start; > + } > +end: > + return (ctx->page_list_head)->page; > +} > + > +/* Remove pagetable pages from cache and move to tail, as free pages */ > +static > +void invalidate_cache_page(struct remus_context *ctx, unsigned long pfn) > +{ > + struct cache_page *item = NULL; > + > + item = ctx->pfn2cache[pfn]; > + if (item) > + { > + /* item at head of list */ > + if (item == ctx->page_list_head) > + { > + ctx->page_list_head = (ctx->page_list_head)->next; > + (ctx->page_list_head)->prev = NULL; > + } > + else if (item == ctx->page_list_tail) > + { > + /* item already at tail of list. */ > + goto end; > + } > + else > + { > + /* item in middle of list */ > + item->prev->next = item->next; > + item->next->prev = item->prev; > + } > + item->next = NULL; > + item->prev = ctx->page_list_tail; > + (ctx->page_list_tail)->next = item; > + ctx->page_list_tail = item; > + end: > + ctx->pfn2cache[pfn] = NULL; > + (ctx->page_list_tail)->pfn = INVALID_P2M_ENTRY; > + } > +} > + > +int xc_remus_add_page(xc_interface *xch, void *remus_ctx, char *page, > + unsigned long pfn, int israw) > +{ > + struct remus_context *ctx = (struct remus_context *)remus_ctx; > + > + /* pagetable page */ > + if (israw) > + invalidate_cache_page(ctx, pfn); > + ctx->sendbuf_pfns[ctx->pfns_index] = israw? INVALID_P2M_ENTRY : pfn; > + memcpy(ctx->inputbuf + ctx->pfns_index * XC_PAGE_SIZE, page, > XC_PAGE_SIZE); > + ctx->pfns_index++; > + > + /* check if we have run out of space. If so, > + * we need to synchronously compress the pages and flush them out > + */ > + if (ctx->pfns_index == NRPAGES(PAGE_BUFFER_SIZE)) > + return -1; > + return 0; > +} > + > +int xc_remus_compress(xc_interface *xch, void *remus_ctx) > +{ > + struct remus_context *ctx = (struct remus_context *)remus_ctx; > + char *cache_copy = NULL; > + int israw; > + > + if (!ctx->pfns_index || (ctx->pfns_iterator == ctx->pfns_index)) > + return 0; > + > + for (; ctx->pfns_iterator < ctx->pfns_index; ctx->pfns_iterator++) > + { > + israw = 0; > + cache_copy = NULL; > + if (ctx->sendbuf_pfns[ctx->pfns_iterator] == INVALID_P2M_ENTRY) > + israw = 1; > + else > + cache_copy = get_cache_page(ctx, > ctx->sendbuf_pfns[ctx->pfns_iterator], > + &israw); > + > + /* Out of space in outbuf! flush and come back */ > + if (__compress(xch, ctx, ctx->inputbuf + ctx->pfns_iterator * > XC_PAGE_SIZE, > + cache_copy, israw) < 0) > + return -1; > + } > + > + return 1; > +} > + > +inline > +unsigned long xc_remus_get_compbuf_len(xc_interface *xch, void *remus_ctx) > +{ > + struct remus_context *ctx = (struct remus_context *)remus_ctx; > + return ctx->compbuf_pos; > +} > + > +inline > +void xc_remus_compbuf_set(xc_interface *xch, void *remus_ctx, > + char *compbuf, unsigned long compbuf_size) > +{ > + struct remus_context *ctx = (struct remus_context *)remus_ctx; > + ctx->compbuf_pos = 0; > + ctx->compbuf = compbuf; > + ctx->compbuf_size = compbuf_size; > +} > + > +inline > +void xc_remus_pagebuf_reset(xc_interface *xch, void *remus_ctx) > +{ > + struct remus_context *ctx = (struct remus_context *)remus_ctx; > + ctx->pfns_index = ctx->pfns_iterator = 0; > +} > + > +int xc_remus_uncompress(xc_interface *xch, char *compbuf, > + unsigned long compbuf_size, > + unsigned long *compbuf_pos, char *dest) > +{ > + return __uncompress(xch, dest, compbuf_pos, compbuf, compbuf_size); > +} > + > +void xc_remus_free_context(xc_interface *xch, void *ctx) > +{ > + struct remus_context *remus_ctx = (struct remus_context *)ctx; > + > + if (!remus_ctx) return; > + > + if (remus_ctx->inputbuf) > + free(remus_ctx->inputbuf); > + if (remus_ctx->sendbuf_pfns) > + free(remus_ctx->sendbuf_pfns); > + if (remus_ctx->cache_base) > + free(remus_ctx->cache_base); > + if (remus_ctx->pfn2cache) > + free(remus_ctx->pfn2cache); > + if (remus_ctx->cache2pfn) > + free(remus_ctx->cache2pfn); > + free(remus_ctx); > +} > + > +void *xc_remus_create_context(xc_interface *xch, unsigned long p2m_size) > +{ > + unsigned long i; > + struct remus_context *remus_ctx = NULL; > + unsigned long num_cache_pages = DELTA_CACHE_SIZE/XC_PAGE_SIZE; > + > + remus_ctx = malloc(sizeof(struct remus_context)); > + if (!remus_ctx) > + { > + ERROR("Failed to allocate remus_ctx\n"); > + goto error; > + } > + memset(remus_ctx, 0, sizeof(struct remus_context)); > + > + if (posix_memalign((void **)&remus_ctx->inputbuf, > + XC_PAGE_SIZE, PAGE_BUFFER_SIZE)) > + { > + ERROR("Failed to allocate page buffer\n"); > + goto error; > + } > + > + remus_ctx->sendbuf_pfns = malloc(NRPAGES(PAGE_BUFFER_SIZE) * > + sizeof(unsigned long)); > + if (!remus_ctx->sendbuf_pfns) > + { > + ERROR("Could not alloc sendbuf_pfns\n"); > + goto error; > + } > + memset(remus_ctx->sendbuf_pfns, -1, > + NRPAGES(PAGE_BUFFER_SIZE) * sizeof(unsigned long)); > + > + if (posix_memalign((void **)&remus_ctx->cache_base, > + XC_PAGE_SIZE, DELTA_CACHE_SIZE)) > + { > + ERROR("Failed to allocate delta cache\n"); > + goto error; > + } > + > + remus_ctx->pfn2cache = calloc(p2m_size, sizeof(struct cache_page *)); > + if (!remus_ctx->pfn2cache) > + { > + ERROR("Could not alloc pfn2cache map\n"); > + goto error; > + } > + > + remus_ctx->cache2pfn = malloc(num_cache_pages * sizeof(struct > cache_page)); > + if (!remus_ctx->cache2pfn) > + { > + ERROR("Could not alloc cache2pfn map\n"); > + goto error; > + } > + > + for (i = 0; i < num_cache_pages; i++) > + { > + remus_ctx->cache2pfn[i].pfn = INVALID_P2M_ENTRY; > + remus_ctx->cache2pfn[i].page = remus_ctx->cache_base + i * > XC_PAGE_SIZE; > + remus_ctx->cache2pfn[i].prev = (i == 0)? NULL : > &(remus_ctx->cache2pfn[i - 1]); > + remus_ctx->cache2pfn[i].next = ((i+1) == num_cache_pages)? NULL : > + &(remus_ctx->cache2pfn[i + 1]); > + } > + remus_ctx->page_list_head = &(remus_ctx->cache2pfn[0]); > + remus_ctx->page_list_tail = &(remus_ctx->cache2pfn[num_cache_pages -1]); > + > + return (void *)remus_ctx; > +error: > + xc_remus_free_context(xch, remus_ctx); > + return NULL; > +} > + > +/* > + * Local variables: > + * mode: C > + * c-set-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 4 > + * indent-tabs-mode: nil > + * End: > + */ > diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xenctrl.h > --- a/tools/libxc/xenctrl.h Wed Jun 15 16:16:41 2011 +0100 > +++ b/tools/libxc/xenctrl.h Sat Jun 18 20:51:59 2011 -0700 > @@ -1820,4 +1820,58 @@ > int verbose); > /* Useful for callers who also use libelf. */ > > +/** > + * Remus Checkpoint Compression > + */ > +void *xc_remus_create_context(xc_interface *xch, unsigned long p2m_size); > +void xc_remus_free_context(xc_interface *xch, void *remus_ctx); > + > +/** > + * Add a page to remus buffer, to be compressed later. > + * returns -1 if there is no space in buffer. > + */ > +int xc_remus_add_page(xc_interface *xch, void *remus_ctx, char *page, > + unsigned long pfn, int israw); > + > +/** > + * Should be called before compressing the pages. Caller supplies a > + * compression buffer compbuf of size compbuf_size. > + */ > +void xc_remus_compbuf_set(xc_interface *xch, void *remus_ctx, char *compbuf, > + unsigned long compbuf_size); > + > +/** > + * Delta compress pages in the remus buffer and inserts the > + * compressed data into the previously supplied compression buffer, compbuf. > + * After compression, the page is copied to the internal LRU cache. > + * > + * This function compresses as many pages as possible into the > + * supplied compression buffer. It maintains an internal iterator to > + * keep track of pages in the input buffer that are yet to be compressed. > + * > + * returns -1 if the compression buffer has run out of space. > + * returns 1 on success. > + * returns 0 if no more pages are left to be compressed. > + */ > +int xc_remus_compress(xc_interface *xch, void *remus_ctx); > + > +/** > + * Returns the exact length of data, in the compression buffer. > + */ > +unsigned long xc_remus_get_compbuf_len(xc_interface *xch, void *remus_ctx); > + > +/** > + * Resets the internal page buffer that holds dirty pages before compression. > + * Also resets the iterators. > + */ > +void xc_remus_pagebuf_reset(xc_interface *xch, void *remus_ctx); > + > +/** > + * Caller must supply the compression buffer (compbuf), its size > (compbuf_size) and > + * an reference to index variable (compbuf_pos) that is used internally. > + * Each call pulls out one page from the compressed chunk and copies it to > dest. > + */ > +int xc_remus_uncompress(xc_interface *xch, char *compbuf, unsigned long > compbuf_size, > + unsigned long *compbuf_pos, char *dest); > + > #endif /* XENCTRL_H */ > diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xenguest.h > --- a/tools/libxc/xenguest.h Wed Jun 15 16:16:41 2011 +0100 > +++ b/tools/libxc/xenguest.h Sat Jun 18 20:51:59 2011 -0700 > @@ -27,6 +27,7 @@ > #define XCFLAGS_DEBUG 2 > #define XCFLAGS_HVM 4 > #define XCFLAGS_STDVGA 8 > +#define XCFLAGS_REMUS_COMPRESS 16 > #define X86_64_B_SIZE 64 > #define X86_32_B_SIZE 32 > > diff -r 23c068b10923 -r b4974a38d101 tools/libxc/xg_save_restore.h > --- a/tools/libxc/xg_save_restore.h Wed Jun 15 16:16:41 2011 +0100 > +++ b/tools/libxc/xg_save_restore.h Sat Jun 18 20:51:59 2011 -0700 > @@ -134,6 +134,8 @@ > #define XC_SAVE_ID_HVM_CONSOLE_PFN -8 /* (HVM-only) */ > #define XC_SAVE_ID_LAST_CHECKPOINT -9 /* Commit to restoring after > completion of current iteration. */ > #define XC_SAVE_ID_HVM_ACPI_IOPORTS_LOCATION -10 > +#define XC_SAVE_ID_COMPRESSED_DATA -11 /* Marker to indicate arrival of > compressed data */ > +#define XC_SAVE_ID_ENABLE_COMPRESSION -12 /* Marker to enable compression > logic at receiver side */ > > /* > ** We process save/restore/migrate in batches of pages; the below _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxxxxxxxx http://lists.xensource.com/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.