Index: trunk/tools/libxc/xc_linux_save.c =================================================================== --- trunk/tools/libxc/xc_linux_save.c (revision 10373) +++ trunk/tools/libxc/xc_linux_save.c (working copy) @@ -26,7 +26,6 @@ #define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */ #define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */ - /* max mfn of the whole machine */ static unsigned long max_mfn; @@ -171,7 +170,42 @@ (new->tv_usec - old->tv_usec); } +static int noncached_write(int fd, int live, void *buffer, int len) +{ + static int write_count = 0; + int rc = write(fd,buffer,len); + +// TODO: Add support for other OS's +#ifdef __linux__ + if (!live) { + write_count += len; + + if (write_count >= MAX_PAGECACHE_USAGE*PAGE_SIZE) { + int serrno = errno; + + /* Time to discard cache -- do up to last page boundary + amount written so far */ + + off_t cur = lseek(fd, 0, SEEK_CUR); + + if (cur != (off_t)-1) { + cur &= ~(PAGE_SIZE-1); + + if (posix_fadvise64(fd, 0, cur, POSIX_FADV_DONTNEED) < 0) { + DPRINTF("Failed to discard cache: %s", strerror(errno)); + } + } + + write_count = 0; + + errno = serrno; + } + } +#endif + return rc; +} + #ifdef ADAPTIVE_SAVE @@ -204,7 +238,7 @@ } -static int ratewrite(int io_fd, void *buf, int n) +static int ratewrite(int io_fd, int live, void *buf, int n) { static int budget = 0; static int burst_time_us = -1; @@ -214,7 +248,7 @@ long long delta; if (START_MBIT_RATE == 0) - return write(io_fd, buf, n); + return noncached_write(io_fd, live, buf, n); budget -= n; if (budget < 0) { @@ -250,13 +284,13 @@ } } } - return write(io_fd, buf, n); + return noncached_write(io_fd, live, buf, n); } #else /* ! ADAPTIVE SAVE */ #define RATE_IS_MAX() (0) -#define ratewrite(_io_fd, _buf, _n) write((_io_fd), (_buf), (_n)) +#define ratewrite(_io_fd, _live, _buf, _n) noncached_write((_io_fd), (_live), (_buf), (_n)) #define initialize_mbit_rate() #endif @@ -1069,7 +1103,7 @@ if(race && !live) goto out; - if (ratewrite(io_fd, page, PAGE_SIZE) != PAGE_SIZE) { + if (ratewrite(io_fd, live, page, PAGE_SIZE) != PAGE_SIZE) { ERROR("Error when writing to state file (4)" " (errno %d)", errno); goto out; @@ -1078,7 +1112,7 @@ } else { /* We have a normal page: just write it directly. */ - if (ratewrite(io_fd, spage, PAGE_SIZE) != PAGE_SIZE) { + if (ratewrite(io_fd, live, spage, PAGE_SIZE) != PAGE_SIZE) { ERROR("Error when writing to state file (5)" " (errno %d)", errno); goto out; @@ -1248,6 +1282,18 @@ DPRINTF("Warning - couldn't disable shadow mode"); } } + else { + // flush last write and discard cache for file + if (fsync(io_fd) < 0) { + DPRINTF("Failed to flush file: %s", strerror(errno)); + } +//TODO: Add support for other OS's +#ifdef __linux + if (posix_fadvise64(io_fd, 0, 0, POSIX_FADV_DONTNEED) < 0) { + DPRINTF("Failed to discard cache: %s", strerror(errno)); + } +#endif + } if (live_shinfo) munmap(live_shinfo, PAGE_SIZE); Index: trunk/tools/libxc/Makefile =================================================================== --- trunk/tools/libxc/Makefile (revision 10373) +++ trunk/tools/libxc/Makefile (working copy) @@ -67,6 +67,7 @@ CFLAGS += -Werror -Wmissing-prototypes CFLAGS += -fno-strict-aliasing CFLAGS += $(INCLUDES) -I. +CFLAGS += -D_GNU_SOURCE # Define this to make it possible to run valgrind on code linked with these # libraries. Index: trunk/tools/libxc/xc_private.h =================================================================== --- trunk/tools/libxc/xc_private.h (revision 10373) +++ trunk/tools/libxc/xc_private.h (working copy) @@ -41,6 +41,13 @@ #define INFO 1 #define PROGRESS 0 +/* +** Define max dirty page cache to permit during save/restore -- need to balance +** keeping cache usage down with CPU impact of invalidating too often. +** (Currently 16MB) +*/ +#define MAX_PAGECACHE_USAGE (4*1024) + #if INFO #define IPRINTF(_f, _a...) printf(_f , ## _a) #else Index: trunk/tools/libxc/xc_core.c =================================================================== --- trunk/tools/libxc/xc_core.c (revision 10373) +++ trunk/tools/libxc/xc_core.c (working copy) @@ -129,12 +129,31 @@ int fd; }; +/* Flush file to disk and discard page cache */ +static int discard_file_cache(int fd, int flush) +{ + if (flush && fsync(fd) < 0) { + PERROR("Failed to flush file: %s", strerror(errno)); + return -errno; + } + +// TODO: Add support for other OS's +#ifdef __linux__ + if (posix_fadvise64(fd, 0, 0, POSIX_FADV_DONTNEED) < 0) { + PERROR("Failed to discard cache: %s", strerror(errno)); + return -errno; + } +#endif + + return 0; +} + /* Callback routine for writing to a local dump file. */ static int local_file_dump(void *args, char *buffer, unsigned int length) { struct dump_args *da = args; int bytes, offset; - + for ( offset = 0; offset < length; offset += bytes ) { bytes = write(da->fd, &buffer[offset], length-offset); @@ -145,6 +164,12 @@ } } + if (length >= DUMP_INCREMENT*PAGE_SIZE) { + // Now dumping pages -- make sure we discard clean pages from + // the cache after each write + discard_file_cache(da->fd, 0 /* no flush */); + } + return 0; } @@ -165,6 +190,9 @@ sts = xc_domain_dumpcore_via_callback( xc_handle, domid, &da, &local_file_dump); + /* flush and discard any remaining portion of the file from cache */ + discard_file_cache(da.fd, 1/* flush first*/); + close(da.fd); return sts; Index: trunk/tools/libxc/xc_linux_restore.c =================================================================== --- trunk/tools/libxc/xc_linux_restore.c (revision 10373) +++ trunk/tools/libxc/xc_linux_restore.c (working copy) @@ -143,7 +143,7 @@ unsigned int console_evtchn, unsigned long *console_mfn) { DECLARE_DOMCTL; - int rc = 1, i, n, pae_extended_cr3 = 0; + int rc = 1, i, n, m, pae_extended_cr3 = 0; unsigned long mfn, pfn; unsigned int prev_pc, this_pc; int verify = 0; @@ -330,7 +330,7 @@ */ prev_pc = 0; - n = 0; + n = m = 0; while (1) { int j, nr_mfns = 0; @@ -490,7 +490,7 @@ else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB ) { ERROR("Bogus page type %lx page table is out of range: " - "i=%d max_pfn=%lu", pagetype, i, max_pfn); + "i=%d max_pfn=%lu", pagetype, i, max_pfn); goto out; } @@ -529,6 +529,23 @@ munmap(region_base, j*PAGE_SIZE); n+= j; /* crude stats */ + + /* discard cache for portion of file read so far up to last page boundary + every 16MB (4k pages) or so */ + m += j; + if (m > MAX_PAGECACHE_USAGE) { +// TODO: Add support for other OS's +#ifdef __linux__ + off_t cur = lseek(io_fd, 0, SEEK_CUR); + if (cur != (off_t)-1) { + cur &= ~(PAGE_SIZE-1); + if (posix_fadvise64(io_fd, 0, cur, POSIX_FADV_DONTNEED) < 0) { + DPRINTF("Failed to discard cache: %s\n", strerror(errno)); + } + } +#endif + m = 0; + } } /* @@ -796,15 +813,15 @@ if (pfn >= max_pfn) { ERROR("PT base is bad: pfn=%lu max_pfn=%lu type=%08lx", - pfn, max_pfn, pfn_type[pfn]); + pfn, max_pfn, pfn_type[pfn]); goto out; } if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) != ((unsigned long)pt_levels<