WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] [xen-unstable] User-land tool for memory paging.

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] [xen-unstable] User-land tool for memory paging.
From: Xen patchbot-unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Wed, 16 Dec 2009 22:40:47 -0800
Delivery-date: Wed, 16 Dec 2009 22:40:54 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1261031275 0
# Node ID 9b344d919ee4c54e33d6f14c44a9fa1856aa6b7d
# Parent  0b56bc4f3b21f37df8b3422da9078442492d9fef
User-land tool for memory paging.

This tool will page out the specified number of pages from the specified
domain. When a paged out page is accessed, Xen will issue a request and
notify the tool over an event channel. The tool will process ther request,
page the page in, and notify Xen.

The current (default) policy tracks the 1024 most recently paged in pages
and will not choose to evict any of those. This is done with the assumption
that if a page is accessed, it is likely to be accessed again soon.

Signed-off-by: Patrick Colp <Patrick.Colp@xxxxxxxxxx>
---
 .hgignore                        |    1 
 tools/Makefile                   |    1 
 tools/xenpaging/Makefile         |   45 ++
 tools/xenpaging/bitops.h         |  448 ++++++++++++++++++++++++++++
 tools/xenpaging/file_ops.c       |   83 +++++
 tools/xenpaging/file_ops.h       |   42 ++
 tools/xenpaging/mem_event.h      |   63 ++++
 tools/xenpaging/policy.h         |   48 +++
 tools/xenpaging/policy_default.c |  103 ++++++
 tools/xenpaging/spinlock.h       |   69 ++++
 tools/xenpaging/xc.c             |  131 ++++++++
 tools/xenpaging/xc.h             |   73 ++++
 tools/xenpaging/xenpaging.c      |  612 +++++++++++++++++++++++++++++++++++++++
 tools/xenpaging/xenpaging.h      |   72 ++++
 14 files changed, 1791 insertions(+)

diff -r 0b56bc4f3b21 -r 9b344d919ee4 .hgignore
--- a/.hgignore Thu Dec 17 06:27:55 2009 +0000
+++ b/.hgignore Thu Dec 17 06:27:55 2009 +0000
@@ -238,6 +238,7 @@
 ^tools/xenfb/vncfb$
 ^tools/xenmon/xentrace_setmask$
 ^tools/xenmon/xenbaked$
+^tools/xenpaging/xenpaging$
 ^tools/xenpmd/xenpmd$
 ^tools/xenstat/xentop/xentop$
 ^tools/xenstore/testsuite/tmp/.*$
diff -r 0b56bc4f3b21 -r 9b344d919ee4 tools/Makefile
--- a/tools/Makefile    Thu Dec 17 06:27:55 2009 +0000
+++ b/tools/Makefile    Thu Dec 17 06:27:55 2009 +0000
@@ -34,6 +34,7 @@ SUBDIRS-y += xenpmd
 SUBDIRS-y += xenpmd
 SUBDIRS-y += libxl
 SUBDIRS-y += remus
+SUBDIRS-y += xenpaging
 
 # These don't cross-compile
 ifeq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH))
diff -r 0b56bc4f3b21 -r 9b344d919ee4 tools/xenpaging/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenpaging/Makefile  Thu Dec 17 06:27:55 2009 +0000
@@ -0,0 +1,45 @@
+XEN_ROOT=../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+CFLAGS   += -I $(XEN_XC)
+CFLAGS   += -I ./
+CFLAGS   += $(CFLAGS_libxenctrl) $(CFLAGS_libxenstore)
+LDFLAGS  += $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenstore)
+
+POLICY    = default
+
+SRC      :=
+SRCS     += file_ops.c xc.c xenpaging.c policy_$(POLICY).c
+
+CFLAGS   += -Werror
+CFLAGS   += -Wno-unused
+CFLAGS   += -g
+
+#CFLAGS   += -Wl,-rpath,..
+CFLAGS   += -Wp,-MD,.$(@F).d
+DEPS     = .*.d
+
+#LDFLAGS  += $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenguest)
+
+OBJS     = $(SRCS:.c=.o)
+IBINS    = xenpaging
+
+all: $(IBINS)
+
+xenpaging: $(OBJS)
+       $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
+
+install: all
+       $(INSTALL_DIR) $(DESTDIR)$(SBINDIR)
+       $(INSTALL_PROG) $(IBINS) $(DESTDIR)$(SBINDIR)
+
+clean:
+       rm -f *.o *~ $(DEPS) xen TAGS $(IBINS) $(LIB)
+
+.PHONY: clean install
+
+.PHONY: TAGS
+TAGS:
+       etags -t $(SRCS) *.h
+
+-include $(DEPS)
diff -r 0b56bc4f3b21 -r 9b344d919ee4 tools/xenpaging/bitops.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenpaging/bitops.h  Thu Dec 17 06:27:55 2009 +0000
@@ -0,0 +1,448 @@
+#ifndef _X86_BITOPS_H
+#define _X86_BITOPS_H
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ */
+
+//#include <xen/config.h>
+
+#ifdef CONFIG_SMP
+#define LOCK_PREFIX "lock ; "
+#else
+#define LOCK_PREFIX ""
+#endif
+
+/*
+ * We specify the memory operand as both input and output because the memory
+ * operand is both read from and written to. Since the operand is in fact a
+ * word array, we also specify "memory" in the clobbers list to indicate that
+ * words other than the one directly addressed by the memory operand may be
+ * modified. We don't use "+m" because the gcc manual says that it should be
+ * used only when the constraint allows the operand to reside in a register.
+ */
+
+#define ADDR (*(volatile long *) addr)
+#define CONST_ADDR (*(const volatile long *) addr)
+
+extern void __bitop_bad_size(void);
+#define bitop_bad_size(addr) (sizeof(*(addr)) < 4)
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void set_bit(int nr, volatile void *addr)
+{
+    asm volatile (
+        LOCK_PREFIX
+        "btsl %1,%0"
+        : "=m" (ADDR)
+        : "Ir" (nr), "m" (ADDR) : "memory");
+}
+#define set_bit(nr, addr) ({                            \
+    if ( bitop_bad_size(addr) ) __bitop_bad_size();     \
+    set_bit(nr, addr);                                  \
+})
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __set_bit(int nr, volatile void *addr)
+{
+    asm volatile (
+        "btsl %1,%0"
+        : "=m" (ADDR)
+        : "Ir" (nr), "m" (ADDR) : "memory");
+}
+#define __set_bit(nr, addr) ({                          \
+    if ( bitop_bad_size(addr) ) __bitop_bad_size();     \
+    __set_bit(nr, addr);                                \
+})
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static inline void clear_bit(int nr, volatile void *addr)
+{
+    asm volatile (
+        LOCK_PREFIX
+        "btrl %1,%0"
+        : "=m" (ADDR)
+        : "Ir" (nr), "m" (ADDR) : "memory");
+}
+#define clear_bit(nr, addr) ({                          \
+    if ( bitop_bad_size(addr) ) __bitop_bad_size();     \
+    clear_bit(nr, addr);                                \
+})
+
+/**
+ * __clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * Unlike clear_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __clear_bit(int nr, volatile void *addr)
+{
+    asm volatile (
+        "btrl %1,%0"
+        : "=m" (ADDR)
+        : "Ir" (nr), "m" (ADDR) : "memory");
+}
+#define __clear_bit(nr, addr) ({                        \
+    if ( bitop_bad_size(addr) ) __bitop_bad_size();     \
+    __clear_bit(nr, addr);                              \
+})
+
+#define smp_mb__before_clear_bit() ((void)0)
+#define smp_mb__after_clear_bit()  ((void)0)
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __change_bit(int nr, volatile void *addr)
+{
+    asm volatile (
+        "btcl %1,%0"
+        : "=m" (ADDR)
+        : "Ir" (nr), "m" (ADDR) : "memory");
+}
+#define __change_bit(nr, addr) ({                       \
+    if ( bitop_bad_size(addr) ) __bitop_bad_size();     \
+    __change_bit(nr, addr);                             \
+})
+
+/**
+ * change_bit - Toggle a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * change_bit() is atomic and may not be reordered.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static inline void change_bit(int nr, volatile void *addr)
+{
+    asm volatile (
+        LOCK_PREFIX
+        "btcl %1,%0"
+        : "=m" (ADDR)
+        : "Ir" (nr), "m" (ADDR) : "memory");
+}
+#define change_bit(nr, addr) ({                         \
+    if ( bitop_bad_size(addr) ) __bitop_bad_size();     \
+    change_bit(nr, addr);                               \
+})
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static inline int test_and_set_bit(int nr, volatile void *addr)
+{
+    int oldbit;
+
+    asm volatile (
+        LOCK_PREFIX
+        "btsl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR)
+        : "Ir" (nr), "m" (ADDR) : "memory");
+    return oldbit;
+}
+#define test_and_set_bit(nr, addr) ({                   \
+    if ( bitop_bad_size(addr) ) __bitop_bad_size();     \
+    test_and_set_bit(nr, addr);                         \
+})
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.  
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_set_bit(int nr, volatile void *addr)
+{
+    int oldbit;
+
+    asm volatile (
+        "btsl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR)
+        : "Ir" (nr), "m" (ADDR) : "memory");
+    return oldbit;
+}
+#define __test_and_set_bit(nr, addr) ({                 \
+    if ( bitop_bad_size(addr) ) __bitop_bad_size();     \
+    __test_and_set_bit(nr, addr);                       \
+})
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static inline int test_and_clear_bit(int nr, volatile void *addr)
+{
+    int oldbit;
+
+    asm volatile (
+        LOCK_PREFIX
+        "btrl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR)
+        : "Ir" (nr), "m" (ADDR) : "memory");
+    return oldbit;
+}
+#define test_and_clear_bit(nr, addr) ({                 \
+    if ( bitop_bad_size(addr) ) __bitop_bad_size();     \
+    test_and_clear_bit(nr, addr);                       \
+})
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.  
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_clear_bit(int nr, volatile void *addr)
+{
+    int oldbit;
+
+    asm volatile (
+        "btrl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR)
+        : "Ir" (nr), "m" (ADDR) : "memory");
+    return oldbit;
+}
+#define __test_and_clear_bit(nr, addr) ({               \
+    if ( bitop_bad_size(addr) ) __bitop_bad_size();     \
+    __test_and_clear_bit(nr, addr);                     \
+})
+
+/* WARNING: non atomic and it can be reordered! */
+static inline int __test_and_change_bit(int nr, volatile void *addr)
+{
+    int oldbit;
+
+    asm volatile (
+        "btcl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR)
+        : "Ir" (nr), "m" (ADDR) : "memory");
+    return oldbit;
+}
+#define __test_and_change_bit(nr, addr) ({              \
+    if ( bitop_bad_size(addr) ) __bitop_bad_size();     \
+    __test_and_change_bit(nr, addr);                    \
+})
+
+/**
+ * test_and_change_bit - Change a bit and return its new value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static inline int test_and_change_bit(int nr, volatile void *addr)
+{
+    int oldbit;
+
+    asm volatile (
+        LOCK_PREFIX
+        "btcl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR)
+        : "Ir" (nr), "m" (ADDR) : "memory");
+    return oldbit;
+}
+#define test_and_change_bit(nr, addr) ({                \
+    if ( bitop_bad_size(addr) ) __bitop_bad_size();     \
+    test_and_change_bit(nr, addr);                      \
+})
+
+static inline int constant_test_bit(int nr, const volatile void *addr)
+{
+    return ((1U << (nr & 31)) &
+            (((const volatile unsigned int *)addr)[nr >> 5])) != 0;
+}
+
+static inline int variable_test_bit(int nr, const volatile void *addr)
+{
+    int oldbit;
+
+    asm volatile (
+        "btl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit)
+        : "m" (CONST_ADDR), "Ir" (nr) : "memory" );
+    return oldbit;
+}
+
+#define test_bit(nr, addr) ({                           \
+    if ( bitop_bad_size(addr) ) __bitop_bad_size();     \
+    (__builtin_constant_p(nr) ?                         \
+     constant_test_bit((nr),(addr)) :                   \
+     variable_test_bit((nr),(addr)));                   \
+})
+
+extern unsigned int __find_first_bit(
+    const unsigned long *addr, unsigned int size);
+extern unsigned int __find_next_bit(
+    const unsigned long *addr, unsigned int size, unsigned int offset);
+extern unsigned int __find_first_zero_bit(
+    const unsigned long *addr, unsigned int size);
+extern unsigned int __find_next_zero_bit(
+    const unsigned long *addr, unsigned int size, unsigned int offset);
+
+static inline unsigned int __scanbit(unsigned long val, unsigned long max)
+{
+    asm ( "bsf %1,%0 ; cmovz %2,%0" : "=&r" (val) : "r" (val), "r" (max) );
+    return (unsigned int)val;
+}
+
+/**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first set bit, not the number of the byte
+ * containing a bit.
+ */
+#define find_first_bit(addr,size)                               \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?       \
+  (__scanbit(*(const unsigned long *)addr, size)) :             \
+  __find_first_bit(addr,size)))
+
+/**
+ * find_next_bit - find the first set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+#define find_next_bit(addr,size,off)                                     \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?                \
+  ((off) + (__scanbit((*(const unsigned long *)addr) >> (off), size))) : \
+  __find_next_bit(addr,size,off)))
+
+/**
+ * find_first_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum size to search
+ *
+ * Returns the bit-number of the first zero bit, not the number of the byte
+ * containing a bit.
+ */
+#define find_first_zero_bit(addr,size)                          \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?       \
+  (__scanbit(~*(const unsigned long *)addr, size)) :            \
+  __find_first_zero_bit(addr,size)))
+
+/**
+ * find_next_zero_bit - find the first zero bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The maximum size to search
+ */
+#define find_next_zero_bit(addr,size,off)                                   \
+((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ?                   \
+  ((off)+(__scanbit(~(((*(const unsigned long *)addr)) >> (off)), size))) : \
+  __find_next_zero_bit(addr,size,off)))
+
+
+/**
+ * find_first_set_bit - find the first set bit in @word
+ * @word: the word to search
+ * 
+ * Returns the bit-number of the first set bit. The input must *not* be zero.
+ */
+static inline unsigned int find_first_set_bit(unsigned long word)
+{
+    asm ( "bsf %1,%0" : "=r" (word) : "r" (word) );
+    return (unsigned int)word;
+}
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as the libc and compiler builtin ffs routines.
+ */
+#if 0
+static inline int ffs(unsigned long x)
+{
+    long r;
+
+    asm ( "bsf %1,%0\n\t"
+          "jnz 1f\n\t"
+          "mov $-1,%0\n"
+          "1:" : "=r" (r) : "rm" (x));
+    return (int)r+1;
+}
+#endif
+
+/**
+ * fls - find last bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as ffs.
+ */
+static inline int fls(unsigned long x)
+{
+    long r;
+
+    asm ( "bsr %1,%0\n\t"
+          "jnz 1f\n\t"
+          "mov $-1,%0\n"
+          "1:" : "=r" (r) : "rm" (x));
+    return (int)r+1;
+}
+
+/**
+ * hweightN - returns the hamming weight of a N-bit word
+ * @x: the word to weigh
+ *
+ * The Hamming Weight of a number is the total number of bits set in it.
+ */
+#define hweight64(x) generic_hweight64(x)
+#define hweight32(x) generic_hweight32(x)
+#define hweight16(x) generic_hweight16(x)
+#define hweight8(x) generic_hweight8(x)
+
+#endif /* _X86_BITOPS_H */
diff -r 0b56bc4f3b21 -r 9b344d919ee4 tools/xenpaging/file_ops.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenpaging/file_ops.c        Thu Dec 17 06:27:55 2009 +0000
@@ -0,0 +1,83 @@
+/******************************************************************************
+ * tools/xenpaging/file_ops.c
+ *
+ * Common file operations.
+ *
+ * Copyright (c) 2009 by Citrix (R&D) Ltd. (Patrick Colp)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#include <unistd.h>
+#include <xc_private.h>
+
+
+#define page_offset(_pfn)     (((off_t)(_pfn)) << PAGE_SHIFT)
+
+
+static int file_op(int fd, void *page, int i,
+                   ssize_t (*fn)(int, const void *, size_t))
+{
+    off_t seek_ret;
+    int total;
+    int bytes;
+    int ret;
+
+    seek_ret = lseek64(fd, i << PAGE_SHIFT, SEEK_SET);
+
+    total = 0;
+    while ( total < PAGE_SIZE )
+    {
+        bytes = fn(fd, page + total, PAGE_SIZE - total);
+        if ( bytes <= 0 )
+        {
+            ret = -errno;
+            goto err;
+        }
+
+        total += bytes;
+    }
+
+    return 0;
+
+ err:
+    return ret;
+}
+
+static ssize_t my_read(int fd, const void *buf, size_t count)
+{
+    return read(fd, (void *)buf, count);
+}
+
+int read_page(int fd, void *page, int i)
+{
+    return file_op(fd, page, i, &my_read);
+}
+
+int write_page(int fd, void *page, int i)
+{
+    return file_op(fd, page, i, &write);
+}
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End: 
+ */
diff -r 0b56bc4f3b21 -r 9b344d919ee4 tools/xenpaging/file_ops.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenpaging/file_ops.h        Thu Dec 17 06:27:55 2009 +0000
@@ -0,0 +1,42 @@
+/******************************************************************************
+ * tools/xenpaging/file_ops.h
+ *
+ * Common file operations.
+ *
+ * Copyright (c) 2009 by Citrix (R&D) Ltd. (Patrick Colp)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#ifndef __FILE_OPS_H__
+#define __FILE_OPS_H__
+
+
+int read_page(int fd, void *page, int i);
+int write_page(int fd, void *page, int i);
+
+
+#endif
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End: 
+ */
diff -r 0b56bc4f3b21 -r 9b344d919ee4 tools/xenpaging/mem_event.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenpaging/mem_event.h       Thu Dec 17 06:27:55 2009 +0000
@@ -0,0 +1,63 @@
+/******************************************************************************
+ * tools/xenpaging/mem_event.h
+ *
+ * Memory event structures.
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Patrick Colp)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#ifndef __XEN_MEM_EVENT_H__
+#define __XEN_MEM_EVENT_H__
+
+
+#include "spinlock.h"
+#include "xc.h"
+#include <xc_private.h>
+
+#include <xen/event_channel.h>
+#include <xen/mem_event.h>
+
+
+#define mem_event_ring_lock_init(_m)  spin_lock_init(&(_m)->ring_lock)
+#define mem_event_ring_lock(_m)       spin_lock(&(_m)->ring_lock)
+#define mem_event_ring_unlock(_m)     spin_unlock(&(_m)->ring_lock)
+
+
+typedef struct mem_event {
+    domid_t domain_id;
+    int xce_handle;
+    int port;
+    mem_event_back_ring_t back_ring;
+    mem_event_shared_page_t *shared_page;
+    void *ring_page;
+    spinlock_t ring_lock;
+} mem_event_t;
+
+
+#endif // __XEN_MEM_EVENT_H__
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 0b56bc4f3b21 -r 9b344d919ee4 tools/xenpaging/policy.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenpaging/policy.h  Thu Dec 17 06:27:55 2009 +0000
@@ -0,0 +1,48 @@
+/******************************************************************************
+ * tools/xenpaging/policy.h
+ *
+ * Xen domain paging policy hooks.
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Patrick Colp)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#ifndef __XEN_PAGING_POLICY_H__
+#define __XEN_PAGING_POLICY_H__
+
+
+#include "xenpaging.h"
+
+
+int policy_init(xenpaging_t *paging);
+int policy_choose_victim(xenpaging_t *paging, domid_t domain_id,
+                         xenpaging_victim_t *victim);
+void policy_notify_paged_out(domid_t domain_id, unsigned long gfn);
+void policy_notify_paged_in(domid_t domain_id, unsigned long gfn);
+
+#endif // __XEN_PAGING_POLICY_H__
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 0b56bc4f3b21 -r 9b344d919ee4 tools/xenpaging/policy_default.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenpaging/policy_default.c  Thu Dec 17 06:27:55 2009 +0000
@@ -0,0 +1,103 @@
+/******************************************************************************
+ * tools/xenpaging/policy.c
+ *
+ * Xen domain paging default policy.
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Patrick Colp)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#include "bitops.h"
+#include "xc.h"
+#include "policy.h"
+
+
+#define MRU_SIZE 1024
+
+
+static unsigned long mru[MRU_SIZE];
+static unsigned int i_mru = 0;
+static unsigned long *bitmap;
+
+
+int policy_init(xenpaging_t *paging)
+{
+    int i;
+    int rc;
+
+    /* Allocate bitmap for pages not to page out */
+    rc = alloc_bitmap(&bitmap, paging->bitmap_size);
+    if ( rc != 0 )
+        goto out;
+
+    /* Initialise MRU list of paged in pages */
+    for ( i = 0; i < MRU_SIZE; i++ )
+        mru[i] = INVALID_MFN;
+
+    /* Don't page out page 0 */
+    set_bit(0, bitmap);
+
+    rc = 0;
+
+ out:
+    return rc;
+}
+
+int policy_choose_victim(xenpaging_t *paging, domid_t domain_id,
+                         xenpaging_victim_t *victim)
+{
+    ASSERT(victim != NULL);
+
+    /* Domain to pick on */
+    victim->domain_id = domain_id;
+    
+    do
+    {
+        /* Randomly choose a gfn to evict */
+        victim->gfn = rand() % paging->domain_info->max_pages;
+    }
+    while ( test_bit(victim->gfn, bitmap) );
+
+    return 0;
+}
+
+void policy_notify_paged_out(domid_t domain_id, unsigned long gfn)
+{
+    set_bit(gfn, bitmap);
+}
+
+void policy_notify_paged_in(domid_t domain_id, unsigned long gfn)
+{
+    unsigned long old_gfn = mru[i_mru & (MRU_SIZE - 1)];
+
+    if ( old_gfn != INVALID_MFN )
+        clear_bit(old_gfn, bitmap);
+    
+    mru[i_mru & (MRU_SIZE - 1)] = gfn;
+    i_mru++;
+}
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 0b56bc4f3b21 -r 9b344d919ee4 tools/xenpaging/spinlock.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenpaging/spinlock.h        Thu Dec 17 06:27:55 2009 +0000
@@ -0,0 +1,69 @@
+/******************************************************************************
+ * tools/xenpaging/spinlock.h
+ *
+ * Spinlock implementation.
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Patrick Colp)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#ifndef __SPINLOCK_H__
+#define __SPINLOCK_H__
+
+
+#include "bitops.h"
+
+
+#define SPIN_LOCK_UNLOCKED 0
+
+
+typedef int spinlock_t;
+
+
+static inline void spin_lock(spinlock_t *lock)
+{
+    while ( test_and_set_bit(1, lock) );
+}
+
+static inline void spin_lock_init(spinlock_t *lock)
+{
+    *lock = SPIN_LOCK_UNLOCKED;
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+    *lock = SPIN_LOCK_UNLOCKED;
+}
+
+static inline int spin_trylock(spinlock_t *lock)
+{
+    return !test_and_set_bit(1, lock);
+}
+
+
+#endif // __SPINLOCK_H__
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 0b56bc4f3b21 -r 9b344d919ee4 tools/xenpaging/xc.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenpaging/xc.c      Thu Dec 17 06:27:55 2009 +0000
@@ -0,0 +1,131 @@
+/******************************************************************************
+ * tools/xenpaging/lib/xc.c
+ *
+ * libxc-type add-ons for paging support.
+ *
+ * Copyright (c) 2009 Citrix (R&D) Ltd. (Patrick Colp)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#include <errno.h>
+#include <sys/poll.h>
+#include <xc_private.h>
+#include <xg_save_restore.h>
+#include <xs.h>
+#include "xc.h"
+
+
+int alloc_bitmap(unsigned long **bitmap, unsigned long bitmap_size)
+{
+    if ( *bitmap == NULL )
+    {
+        *bitmap = calloc(bitmap_size / BITS_PER_LONG, sizeof(unsigned long));
+
+        if ( *bitmap == NULL )
+            return -ENOMEM;
+    }
+
+    memset(*bitmap, 0, bitmap_size / 8);
+
+    return 0;
+}
+
+int xc_mem_paging_flush_ioemu_cache(domid_t domain_id)
+{
+    struct xs_handle *xsh = NULL;
+    char path[80];
+    int rc;
+
+    sprintf(path, "/local/domain/0/device-model/%u/command", domain_id);
+
+    xsh = xs_daemon_open();
+    if ( xsh == NULL )
+        return -EIO;
+
+    rc = xs_write(xsh, XBT_NULL, path, "flush-cache", strlen(path));
+
+    xs_daemon_close(xsh);
+
+    return rc;
+}
+
+int xc_wait_for_event_or_timeout(int xce_handle, unsigned long ms)
+{
+    struct pollfd fd = { .fd = xce_handle, .events = POLLIN | POLLERR };
+    int port;
+    int rc;
+    
+    rc = poll(&fd, 1, ms);
+    if ( rc == -1 )
+    {
+        if (errno == EINTR)
+            return 0;
+
+        ERROR("Poll exited with an error");
+        goto err;
+    }
+    
+    if ( rc == 1 )
+    {
+        port = xc_evtchn_pending(xce_handle);
+        if ( port == -1 )
+        {
+            ERROR("Failed to read port from event channel");
+            goto err;
+        }
+        
+        rc = xc_evtchn_unmask(xce_handle, port);
+        if ( rc != 0 )
+        {
+            ERROR("Failed to unmask event channel port");
+            goto err;
+        }
+    }
+    else
+        port = -1;
+    
+    return port;
+
+ err:
+    return -errno;
+}
+
+int xc_wait_for_event(int xce_handle)
+{
+    return xc_wait_for_event_or_timeout(xce_handle, -1);
+}
+
+int xc_get_platform_info(int xc_handle, domid_t domain_id,
+                         xc_platform_info_t *platform_info)
+{
+    return get_platform_info(xc_handle, domain_id,
+                             &platform_info->max_mfn,
+                             &platform_info->hvirt_start,
+                             &platform_info->pt_levels,
+                             &platform_info->guest_width);
+}
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 0b56bc4f3b21 -r 9b344d919ee4 tools/xenpaging/xc.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenpaging/xc.h      Thu Dec 17 06:27:55 2009 +0000
@@ -0,0 +1,73 @@
+/******************************************************************************
+ * tools/xenpaging/lib/xc.h
+ *
+ * libxc add-ons. 
+ *
+ * Copyright (c) 2009 Citrix (R&D) Ltd. (Patrick Colp)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#ifndef __XC_H__
+#define __XC_H__
+
+
+#include <xc_private.h>
+#include <xen/mem_event.h>
+
+
+#if 1
+#define ASSERT(_p) \
+    if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \
+    __LINE__, __FILE__); *(int*)0=0; }
+#else
+#define ASSERT(_p) ((void)0)
+#endif
+
+
+#define BITS_PER_LONG 64
+
+
+typedef struct xc_platform_info {
+    unsigned long max_mfn;
+    unsigned long hvirt_start;
+    unsigned int  pt_levels;
+    unsigned int  guest_width;
+} xc_platform_info_t;
+
+
+int alloc_bitmap(unsigned long **bitmap, unsigned long bitmap_size);
+
+int xc_mem_paging_flush_ioemu_cache(domid_t domain_id);
+int xc_wait_for_event(int xce_handle);
+int xc_wait_for_event_or_timeout(int xce_handle, unsigned long ms);
+
+int xc_get_platform_info(int xc_handle, domid_t domain_id,
+                         xc_platform_info_t *platform_info);
+
+
+#endif // __XC_H__
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 0b56bc4f3b21 -r 9b344d919ee4 tools/xenpaging/xenpaging.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenpaging/xenpaging.c       Thu Dec 17 06:27:55 2009 +0000
@@ -0,0 +1,612 @@
+/******************************************************************************
+ * tools/xenpaging/xenpaging.c
+ *
+ * Domain paging. 
+ * Copyright (c) 2009 by Citrix (R&D) Ltd. (Patrick Colp)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#include <stdlib.h>
+#include <xc_private.h>
+
+#include <xen/mem_event.h>
+
+#include "bitops.h"
+#include "spinlock.h"
+#include "file_ops.h"
+#include "xc.h"
+
+#include "policy.h"
+#include "xenpaging.h"
+
+
+#if 0
+#undef DPRINTF
+#define DPRINTF(...) ((void)0)
+#endif
+
+
+static void *init_page(void)
+{
+    void *buffer;
+    int ret;
+
+    /* Allocated page memory */
+    ret = posix_memalign(&buffer, PAGE_SIZE, PAGE_SIZE);
+    if ( ret != 0 )
+        goto out_alloc;
+
+    /* Lock buffer in memory so it can't be paged out */
+    ret = mlock(buffer, PAGE_SIZE);
+    if ( ret != 0 )
+        goto out_lock;
+
+    return buffer;
+
+ out_init:
+    munlock(buffer, PAGE_SIZE);
+ out_lock:
+    free(buffer);
+ out_alloc:
+    return NULL;
+}
+
+xenpaging_t *xenpaging_init(domid_t domain_id)
+{
+    xenpaging_t *paging;
+    int rc;
+
+    DPRINTF("xenpaging init\n");
+
+    /* Allocate memory */
+    paging = malloc(sizeof(xenpaging_t));
+    memset(paging, 0, sizeof(xenpaging_t));
+
+    /* Open connection to xen */
+    paging->xc_handle = xc_interface_open();
+    if ( paging->xc_handle < 0 )
+    {
+        ERROR("Failed to open connection to Xen");
+        goto err;
+    }
+
+    /* Set domain id */
+    paging->mem_event.domain_id = domain_id;
+
+    /* Initialise shared page */
+    paging->mem_event.shared_page = init_page();
+    if ( paging->mem_event.shared_page == NULL )
+    {
+        ERROR("Error initialising shared page");
+        goto err;
+    }
+
+    /* Initialise ring page */
+    paging->mem_event.ring_page = init_page();
+    if ( paging->mem_event.ring_page == NULL )
+    {
+        ERROR("Error initialising shared page");
+        goto err;
+    }
+
+    /* Initialise ring */
+    SHARED_RING_INIT((mem_event_sring_t *)paging->mem_event.ring_page);
+    BACK_RING_INIT(&paging->mem_event.back_ring,
+                   (mem_event_sring_t *)paging->mem_event.ring_page,
+                   PAGE_SIZE);
+
+    /* Initialise lock */
+    mem_event_ring_lock_init(&paging->mem_event);
+    
+    /* Initialise Xen */
+    rc = xc_mem_event_enable(paging->xc_handle, paging->mem_event.domain_id,
+                             paging->mem_event.shared_page,
+                             paging->mem_event.ring_page);
+    if ( rc != 0 )
+    {
+        ERROR("Error initialising shared page");
+        goto err;
+    }
+
+    /* Open event channel */
+    paging->mem_event.xce_handle = xc_evtchn_open();
+    if ( paging->mem_event.xce_handle < 0 )
+    {
+        ERROR("Failed to open event channel");
+        goto err;
+    }
+
+    /* Bind event notification */
+    rc = xc_evtchn_bind_interdomain(paging->mem_event.xce_handle,
+                                    paging->mem_event.domain_id,
+                                    paging->mem_event.shared_page->port);
+    if ( rc < 0 )
+    {
+        ERROR("Failed to bind event channel");
+        goto err;
+    }
+
+    paging->mem_event.port = rc;
+
+    /* Get platform info */
+    paging->platform_info = malloc(sizeof(xc_platform_info_t));
+    if ( paging->platform_info == NULL )
+    {
+        ERROR("Error allocating memory for platform info");
+        goto err;
+    }
+
+    rc = xc_get_platform_info(paging->xc_handle, domain_id,
+                              paging->platform_info);
+    if ( rc != 1 )
+    {
+        ERROR("Error getting platform info");
+        goto err;
+    }
+
+    /* Get domaininfo */
+    paging->domain_info = malloc(sizeof(xc_domaininfo_t));
+    if ( paging->domain_info == NULL )
+    {
+        ERROR("Error allocating memory for domain info");
+        goto err;
+    }
+
+    rc = xc_domain_getinfolist(paging->xc_handle, domain_id, 1,
+                               paging->domain_info);
+    if ( rc != 1 )
+    {
+        ERROR("Error getting domain info");
+        goto err;
+    }
+
+    /* Allocate bitmap for tracking pages that have been paged out */
+    paging->bitmap_size = (paging->domain_info->max_pages + BITS_PER_LONG) &
+                          ~(BITS_PER_LONG - 1);
+
+    rc = alloc_bitmap(&paging->bitmap, paging->bitmap_size);
+    if ( rc != 0 )
+    {
+        ERROR("Error allocating bitmap");
+        goto err;
+    }
+    DPRINTF("max_pages = %lx\n", paging->domain_info->max_pages);
+
+    /* Initialise policy */
+    rc = policy_init(paging);
+    if ( rc != 0 )
+    {
+        ERROR("Error initialising policy");
+        goto err;
+    }
+
+    return paging;
+
+ err:
+    if ( paging->bitmap )
+        free(paging->bitmap);
+    if ( paging->platform_info )
+        free(paging->platform_info);
+    if ( paging )
+        free(paging);
+
+    return NULL;
+}
+
+int xenpaging_teardown(xenpaging_t *paging)
+{
+    int rc;
+
+    /* Tear down domain paging in Xen */
+    rc = xc_mem_event_disable(paging->xc_handle, paging->mem_event.domain_id);
+    if ( rc != 0 )
+    {
+        ERROR("Error tearing down domain paging in xen");
+        goto err;
+    }
+
+    /* Unbind VIRQ */
+    rc = xc_evtchn_unbind(paging->mem_event.xce_handle, 
paging->mem_event.port);
+    if ( rc != 0 )
+    {
+        ERROR("Error unbinding event port");
+        goto err;
+    }
+    paging->mem_event.port = -1;
+
+    /* Close event channel */
+    rc = xc_evtchn_close(paging->mem_event.xce_handle);
+    if ( rc != 0 )
+    {
+        ERROR("Error closing event channel");
+        goto err;
+    }
+    paging->mem_event.xce_handle = -1;
+    
+    /* Close connection to Xen */
+    rc = xc_interface_close(paging->xc_handle);
+    if ( rc != 0 )
+    {
+        ERROR("Error closing connection to xen");
+        goto err;
+    }
+    paging->xc_handle = -1;
+
+    return 0;
+
+ err:
+    return -1;
+}
+
+static int get_request(mem_event_t *mem_event, mem_event_request_t *req)
+{
+    mem_event_back_ring_t *back_ring;
+    RING_IDX req_cons;
+
+    mem_event_ring_lock(mem_event);
+
+    back_ring = &mem_event->back_ring;
+    req_cons = back_ring->req_cons;
+
+    /* Copy request */
+    memcpy(req, RING_GET_REQUEST(back_ring, req_cons), sizeof(*req));
+    req_cons++;
+
+    /* Update ring */
+    back_ring->req_cons = req_cons;
+    back_ring->sring->req_event = req_cons + 1;
+
+    mem_event_ring_unlock(mem_event);
+
+    return 0;
+}
+
+static int put_response(mem_event_t *mem_event, mem_event_response_t *rsp)
+{
+    mem_event_back_ring_t *back_ring;
+    RING_IDX rsp_prod;
+
+    mem_event_ring_lock(mem_event);
+
+    back_ring = &mem_event->back_ring;
+    rsp_prod = back_ring->rsp_prod_pvt;
+
+    /* Copy response */
+    memcpy(RING_GET_RESPONSE(back_ring, rsp_prod), rsp, sizeof(*rsp));
+    rsp_prod++;
+
+    /* Update ring */
+    back_ring->rsp_prod_pvt = rsp_prod;
+    RING_PUSH_RESPONSES(back_ring);
+
+    mem_event_ring_unlock(mem_event);
+
+    return 0;
+}
+
+int xenpaging_evict_page(xenpaging_t *paging, xenpaging_victim_t *victim, int 
fd, int i)
+{
+    void *page;
+    unsigned long gfn;
+    int ret;
+
+    DECLARE_DOMCTL;
+
+    /* Map page */
+    gfn = victim->gfn;
+    ret = -EFAULT;
+    page = xc_map_foreign_batch(paging->xc_handle, victim->domain_id,
+                                PROT_READ | PROT_WRITE, &gfn, 1);
+    if ( (gfn & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XTAB )
+    {
+        ERROR("Error mapping page");
+        goto out;
+    }
+
+    /* Copy page */
+    ret = write_page(fd, page, i);
+    if ( ret != 0 )
+    {
+        munmap(page, PAGE_SIZE);
+        ERROR("Error copying page");
+        goto out;
+    }
+
+    /* Clear page */
+    memset(page, 0, PAGE_SIZE);
+
+    munmap(page, PAGE_SIZE);
+
+    /* Tell Xen to evict page */
+    ret = xc_mem_paging_evict(paging->xc_handle, paging->mem_event.domain_id,
+                              victim->gfn);
+    if ( ret != 0 )
+    {
+        ERROR("Error evicting page");
+        goto out;
+    }
+
+    /* Notify policy of page being paged in */
+    policy_notify_paged_in(paging->mem_event.domain_id, victim->gfn);
+
+ out:
+    return ret;
+}
+
+int xenpaging_resume_page(xenpaging_t *paging, mem_event_response_t *rsp)
+{
+    int ret;
+
+    /* Put the page info on the ring */
+    ret = put_response(&paging->mem_event, rsp);
+    if ( ret != 0 )
+        goto out;
+
+    /* Notify policy of page being paged in */
+    policy_notify_paged_in(paging->mem_event.domain_id, rsp->gfn);
+
+    /* Tell Xen page is ready */
+    ret = xc_mem_paging_resume(paging->xc_handle, paging->mem_event.domain_id,
+                               rsp->gfn);
+    ret = xc_evtchn_notify(paging->mem_event.xce_handle,
+                           paging->mem_event.port);
+
+ out:
+    return ret;
+}
+
+int xenpaging_populate_page(xenpaging_t *paging, unsigned long *gfn, int fd, 
int i)
+{
+    void *page;
+    int ret;
+
+    /* Tell Xen to allocate a page for the domain */
+    ret = xc_mem_paging_prep(paging->xc_handle, paging->mem_event.domain_id,
+                             *gfn);
+    if ( ret != 0 )
+    {
+        ERROR("Error preparing for page in");
+        goto out_map;
+    }
+
+    /* Map page */
+    ret = -EFAULT;
+    page = xc_map_foreign_batch(paging->xc_handle, paging->mem_event.domain_id,
+                                PROT_READ | PROT_WRITE, gfn, 1);
+    if ( page == NULL )
+    {
+        ERROR("Error mapping page: page is null");
+        goto out_map;
+    }
+
+    /* Check that the page exists */
+    if ( (*gfn & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XTAB )
+    {
+        ERROR("Error mapping page: gfn is invalid");
+        goto out;
+    }
+
+    /* Read page */
+    ret = read_page(fd, page, i);
+    if ( ret != 0 )
+    {
+        ERROR("Error reading page");
+        goto out;
+    }
+
+ out:
+    munmap(page, PAGE_SIZE);
+ out_map:
+    return ret;
+}
+
+static int evict_victim(xenpaging_t *paging, domid_t domain_id,
+                        xenpaging_victim_t *victim, int fd, int i)
+{
+    int j = 0;
+    int ret;
+
+    do
+    {
+        ret = policy_choose_victim(paging, domain_id, victim);
+        if ( ret != 0 )
+        {
+            ERROR("Error choosing victim");
+            goto out;
+        }
+
+        ret = xc_mem_paging_nominate(paging->xc_handle,
+                                     paging->mem_event.domain_id, victim->gfn);
+        if ( ret == 0 )
+            ret = xenpaging_evict_page(paging, victim, fd, i);
+        else
+        {
+            if ( j++ % 1000 == 0 )
+                if ( xc_mem_paging_flush_ioemu_cache(domain_id) )
+                    ERROR("Error flushing ioemu cache");
+        }
+    }
+    while ( ret );
+
+    if ( test_and_set_bit(victim->gfn, paging->bitmap) )
+        ERROR("Page has been evicted before");
+
+    ret = 0;
+
+ out:
+    return ret;
+}
+
+int main(int argc, char *argv[])
+{
+    domid_t domain_id = atoi(argv[1]);
+    int num_pages = atoi(argv[2]);
+    xenpaging_t *paging;
+    xenpaging_victim_t victims[num_pages];
+    mem_event_request_t req;
+    mem_event_response_t rsp;
+    int i;
+    int rc;
+
+    int open_flags = O_CREAT | O_TRUNC | O_RDWR;
+    mode_t open_mode = S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR | S_IWGRP | 
S_IWOTH;
+    char filename[80];
+    int fd;
+
+    /* Open file */
+    sprintf(filename, "page_cache_%d", domain_id);
+    fd = open(filename, open_flags, open_mode);
+    if ( fd < 0 )
+    {
+        perror("failed to open file");
+        return -1;
+    }
+
+    /* Seed random-number generator */
+    srand(time(NULL));
+
+    /* Initialise domain paging */
+    paging = xenpaging_init(domain_id);
+    if ( paging == NULL )
+    {
+        ERROR("Error initialising paging");
+        goto out;
+    }
+
+    /* Evict pages */
+    memset(victims, 0, sizeof(xenpaging_victim_t) * num_pages);
+    for ( i = 0; i < num_pages; i++ )
+    {
+        evict_victim(paging, domain_id, &victims[i], fd, i);
+        if ( i % 100 == 0 )
+            DPRINTF("%d pages evicted\n", i);
+    }
+
+    DPRINTF("pages evicted\n");
+
+    /* Swap pages in and out */
+    while ( 1 )
+    {
+        /* Wait for Xen to signal that a page needs paged in */
+        rc = xc_wait_for_event_or_timeout(paging->mem_event.xce_handle, 100);
+        if ( rc < -1 )
+        {
+            ERROR("Error getting event");
+            goto out;
+        }
+        else if ( rc != -1 )
+        {
+            DPRINTF("Got event from Xen\n");
+        }
+
+        while ( RING_HAS_UNCONSUMED_REQUESTS(&paging->mem_event.back_ring) )
+        {
+            rc = get_request(&paging->mem_event, &req);
+            if ( rc != 0 )
+            {
+                ERROR("Error getting request");
+                goto out;
+            }
+
+            /* Check if the page has already been paged in */
+            if ( test_and_clear_bit(req.gfn, paging->bitmap) )
+            {
+                /* Find where in the paging file to read from */
+                for ( i = 0; i < num_pages; i++ )
+                {
+                    if ( (victims[i].domain_id == paging->mem_event.domain_id) 
&&
+                         (victims[i].gfn == req.gfn) )
+                        break;
+                }
+    
+                if ( i >= num_pages )
+                {
+                    DPRINTF("Couldn't find page %lx\n", req.gfn);
+                    goto out;
+                }
+                
+                /* Populate the page */
+                rc = xenpaging_populate_page(paging, &req.gfn, fd, i);
+                if ( rc != 0 )
+                {
+                    ERROR("Error populating page");
+                    goto out;
+                }
+
+                /* Prepare the response */
+                rsp.gfn = req.gfn;
+                rsp.p2mt = req.p2mt;
+                rsp.vcpu_id = req.vcpu_id;
+                rsp.flags = req.flags;
+
+                rc = xenpaging_resume_page(paging, &rsp);
+                if ( rc != 0 )
+                {
+                    ERROR("Error resuming page");
+                    goto out;
+                }
+
+                /* Evict a new page to replace the one we just paged in */
+                evict_victim(paging, domain_id, &victims[i], fd, i);
+            }
+            else
+            {
+                DPRINTF("page already populated (domain = %d; vcpu = %d; gfn = 
%lx; paused = %ld)\n",
+                        paging->mem_event.domain_id, req.vcpu_id, req.gfn, 
req.flags & MEM_EVENT_FLAG_PAUSED);
+
+                /* Tell Xen to resume the vcpu */
+                /* XXX: Maybe just check if the vcpu was paused? */
+                if ( req.flags & MEM_EVENT_FLAG_PAUSED )
+                {
+                    /* Prepare the response */
+                    rsp.gfn = req.gfn;
+                    rsp.p2mt = req.p2mt;
+                    rsp.vcpu_id = req.vcpu_id;
+                    rsp.flags = req.flags;
+
+                    rc = xenpaging_resume_page(paging, &rsp);
+                    if ( rc != 0 )
+                    {
+                        ERROR("Error resuming");
+                        goto out;
+                    }
+                }
+            }
+        }
+    }
+
+ out:
+    /* Tear down domain paging */
+    rc = xenpaging_teardown(paging);
+    if ( rc != 0 )
+    {
+        ERROR("Error tearing down paging");
+        exit(1);
+    }
+
+    return 0;
+}
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End: 
+ */
diff -r 0b56bc4f3b21 -r 9b344d919ee4 tools/xenpaging/xenpaging.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/xenpaging/xenpaging.h       Thu Dec 17 06:27:55 2009 +0000
@@ -0,0 +1,72 @@
+/******************************************************************************
+ * tools/xenpaging/xenpaging.h
+ *
+ * Xen domain paging.
+ *
+ * Copyright (c) 2009 Citrix (R&D) Inc. (Patrick Colp)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#ifndef __XEN_PAGING2_H__
+#define __XEN_PAGING2_H__
+
+
+#include "spinlock.h"
+#include "xc.h"
+#include <xc_private.h>
+
+#include <xen/event_channel.h>
+#include <xen/mem_event.h>
+
+#include "mem_event.h"
+
+
+typedef struct xenpaging {
+    int xc_handle;
+
+    xc_platform_info_t *platform_info;
+    xc_domaininfo_t    *domain_info;
+
+    unsigned long  bitmap_size;
+    unsigned long *bitmap;
+
+    mem_event_t mem_event;
+} xenpaging_t;
+
+
+typedef struct xenpaging_victim {
+    /* the domain to evict a page from */
+    domid_t domain_id;
+    /* the gfn of the page to evict */
+    unsigned long gfn;
+    /* the mfn of evicted page */
+    unsigned long mfn;
+} xenpaging_victim_t;
+
+
+#endif // __XEN_PAGING_H__
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] [xen-unstable] User-land tool for memory paging., Xen patchbot-unstable <=