[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC][PATCH 02/13] Kemari: core kemari code



This patch implements the core of Kemari.  If Kemari is turned on, VM
synchronization starts when an event is tapped.  First, Kemari pauses the guest
VM and locks the grant tables.  Then, it extracts dirtied pfn from the bitmap,
copies pfns to the shared buffer, and notifies tools in userland via xen event
channel in userland to transfer the guest.

Signed-off-by: Yoshi Tamura <tamura.yoshiaki@xxxxxxxxxxxxx>
Signed-off-by: Yoshisato Yanagisawa <yanagisawa.yoshisato@xxxxxxxxxxxxx>
---
 xen/arch/x86/Makefile          |    1
 xen/arch/x86/domain.c          |    4
 xen/arch/x86/domctl.c          |   16
 xen/arch/x86/kemari/Makefile   |    1
 xen/arch/x86/kemari/kemari.c   |  666 +++++++++++++++++++++++++++++++++++++++++
 xen/include/public/domctl.h    |   33 ++
 xen/include/public/io/xenbus.h |    4
 xen/include/public/kemari.h    |   97 +++++
 xen/include/xen/kemari.h       |   75 ++++
 9 files changed, 896 insertions(+), 1 deletion(-)

diff -r 19201eebab16 xen/arch/x86/Makefile
--- a/xen/arch/x86/Makefile     Thu Sep 25 13:33:50 2008 +0100
+++ b/xen/arch/x86/Makefile     Wed Mar 04 17:04:27 2009 +0900
@@ -4,6 +4,7 @@
 subdir-y += hvm
 subdir-y += mm
 subdir-y += oprofile
+subdir-y += kemari

 subdir-$(x86_32) += x86_32
 subdir-$(x86_64) += x86_64
diff -r 19201eebab16 xen/arch/x86/kemari/Makefile
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/kemari/Makefile      Wed Mar 04 17:04:28 2009 +0900
@@ -0,0 +1,1 @@
+obj-y += kemari.o
diff -r 19201eebab16 xen/include/xen/kemari.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/xen/kemari.h  Wed Mar 04 17:04:30 2009 +0900
@@ -0,0 +1,75 @@
+/******************************************************************************
+ * kemari.h
+ *
+ * Kemari header file.
+ *
+ * Copyright (C) 2008 Nippon Telegraph and Telephone Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __XEN_KEMARI_H__
+#define __XEN_KEMARI_H__
+
+#include <public/domctl.h>
+
+#define NUM_KEMARI_TAPS 32
+
+#define _KEMARI_TAP_ATTACHED 0
+#define KEMARI_TAP_ATTACHED (1UL<<_KEMARI_TAP_ATTACHED)
+#define _KEMARI_TAP_DETACHED 1
+#define KEMARI_TAP_DETACHED (1UL<<_KEMARI_TAP_DETACHED)
+
+struct kemari_tap {
+    uint64_t status;
+    uint64_t in_events;
+    uint64_t out_events;
+};
+
+/* Main data structure of Kemari  */
+struct kemari {
+    struct domain      *domain;
+
+    struct kemari_ring *ring;
+
+    uint32_t           port;
+
+    uint32_t           num_pages;
+
+    uint64_t           mfn;
+
+    uint64_t           num_events;
+
+    uint64_t           priv_dirty_pages;
+
+    struct kemari_tap  taps[NUM_KEMARI_TAPS];
+};
+
+long kemari_off(struct domain *d);
+
+/* Entry point to Kemari */
+long do_kemari_op(struct domain *d, struct xen_domctl_kemari_op *kemari_op);
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 19201eebab16 xen/arch/x86/kemari/kemari.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/arch/x86/kemari/kemari.c      Wed Mar 04 17:04:28 2009 +0900
@@ -0,0 +1,666 @@
+/******************************************************************************
+ * kemari.c
+ *
+ * The hypervisor part of VM synchronization mechanism (Kemari).
+ *
+ * Copyright (c) 2008 Nippon Telegraph and Telephone Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * Copied log_dirty_lock(_d), log_dirty_unlock(_d) and paging_log_dirty_op()
+ * from arch/x86/paging.c.
+ *
+ * x86 specific paging support
+ * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
+ * Copyright (c) 2007 XenSource Inc.
+ */
+
+#include <xen/config.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/event.h>
+#include <xen/kemari.h>
+#include <xen/mm.h>
+#include <xen/domain.h>
+
+#include <public/kemari.h>
+#include <asm/domain.h>
+#include <asm/hvm/support.h>
+#include <asm/page.h>
+#include <asm/paging.h>
+#include <asm/shadow.h>
+#include <asm/types.h>
+
+#define log_dirty_lock(_d)                                                   \
+    do {                                                                     \
+        if (unlikely((_d)->arch.paging.log_dirty.locker==current->processor))\
+        {                                                                    \
+            printk("Error: paging log dirty lock held by %s\n",              \
+                   (_d)->arch.paging.log_dirty.locker_function);             \
+            BUG();                                                           \
+        }                                                                    \
+        spin_lock(&(_d)->arch.paging.log_dirty.lock);                        \
+        ASSERT((_d)->arch.paging.log_dirty.locker == -1);                    \
+        (_d)->arch.paging.log_dirty.locker = current->processor;             \
+        (_d)->arch.paging.log_dirty.locker_function = __func__;              \
+    } while (0)
+
+#define log_dirty_unlock(_d)                                              \
+    do {                                                                  \
+        ASSERT((_d)->arch.paging.log_dirty.locker == current->processor); \
+        (_d)->arch.paging.log_dirty.locker = -1;                          \
+        (_d)->arch.paging.log_dirty.locker_function = "nobody";           \
+        spin_unlock(&(_d)->arch.paging.log_dirty.lock);                   \
+    } while (0)
+
+#define bucket_from_port(d,p) \
+    ((d)->evtchn[(p)/EVTCHNS_PER_BUCKET])
+#define port_is_valid(d,p)    \
+    (((p) >= 0) && ((p) < MAX_EVTCHNS(d)) && \
+     (bucket_from_port(d,p) != NULL))
+#define evtchn_from_port(d,p) \
+    (&(bucket_from_port(d,p))[(p)&(EVTCHNS_PER_BUCKET-1)])
+
+static void kemari_send_domaininfo_ctxt(struct kemari_ring *ring,
+                                        struct domain *d)
+{
+    struct hvm_domain_context ctxt;
+
+    if ( !d->is_paused_by_controller )
+    {
+        dprintk(XENLOG_ERR, "Domain isn't paused\n");
+        return;
+    }
+
+    ctxt.cur = 0;
+    ctxt.size = ring->hvm_ctxt.buf_size;
+    ctxt.data = (uint8_t *)ring + ring->hvm_ctxt.buf_offset;
+    hvm_save(d, &ctxt);
+    ring->hvm_ctxt.rec_size = ctxt.cur;
+}
+
+static long kemari_send_dirty_bitmap_page(struct kemari_ring *ring,
+                                          struct domain *d,
+                                          unsigned long *dirty_bitmap,
+                                          uint16_t index, unsigned int bytes)
+{
+    uint16_t i, j;
+    struct kemari_ent *buf;
+
+    for ( i = 0; i < bytes / BYTES_PER_LONG; i++ )
+    {
+        j = i;
+
+        while ( (j < bytes / BYTES_PER_LONG) && (dirty_bitmap[j] != 0) )
+            j++;
+
+        if ( i == j )
+            continue;
+
+        buf = KEMARI_RING_GET_PROD(ring);
+        buf->u.index.start = i + index;
+        buf->u.index.end = j + index;
+        wmb();
+        ring->prod++;
+
+        while( i < j )
+        {
+            buf = (struct kemari_ent *)&dirty_bitmap[i];
+            kemari_ring_write(ring, buf);
+            i++;
+        }
+    }
+    return i;
+}
+
+/* Based on paging_log_dirty_op() in xen/arch/x86/mm/paging.c. */
+static long kemari_send_dirty_bitmap(struct kemari_ring *ring,
+                                     struct domain *d)
+{
+    long ret = 0, clean = 1, peek = 1;
+    unsigned long pages = 0;
+    unsigned long p2m_size;
+    mfn_t *l4, *l3, *l2;
+    unsigned long *l1;
+    int i4, i3, i2;
+    uint16_t index = 0;
+
+    log_dirty_lock(d);
+
+    if ( clean )
+    {
+        d->arch.paging.log_dirty.fault_count = 0;
+        d->arch.paging.log_dirty.dirty_count = 0;
+    }
+
+    if ( !mfn_valid(d->arch.paging.log_dirty.top) )
+    {
+        ret = -EINVAL; /* perhaps should be ENOMEM? */
+        goto out;
+    }
+
+    if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
+        printk("%s: %d failed page allocs while logging dirty pages\n",
+               __FUNCTION__, d->arch.paging.log_dirty.failed_allocs);
+        ret = -ENOMEM;
+        goto out;
+    }
+
+    pages = 0;
+    l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top));
+
+    p2m_size = domain_get_maximum_gpfn(d) + 1;
+
+    for ( i4 = 0;
+          (pages < p2m_size) && (i4 < LOGDIRTY_NODE_ENTRIES);
+          i4++ )
+    {
+        l3 = mfn_valid(l4[i4]) ? map_domain_page(mfn_x(l4[i4])) : NULL;
+        for ( i3 = 0;
+              (pages < p2m_size) && (i3 < LOGDIRTY_NODE_ENTRIES);
+              i3++ )
+        {
+            l2 = ((l3 && mfn_valid(l3[i3])) ?
+                  map_domain_page(mfn_x(l3[i3])) : NULL);
+            for ( i2 = 0;
+                  (pages < p2m_size) && (i2 < LOGDIRTY_NODE_ENTRIES);
+                  i2++ )
+            {
+                unsigned int bytes = PAGE_SIZE;
+                l1 = ((l2 && mfn_valid(l2[i2])) ?
+                      map_domain_page(mfn_x(l2[i2])) : NULL);
+                if ( unlikely(((p2m_size - pages + 7) >> 3) < bytes) )
+                    bytes = (unsigned int)((p2m_size - pages +
+                                            BITS_PER_LONG - 1) >> 3);
+                if ( likely(peek) )
+                {
+                    if ( l1 != NULL &&
+                         kemari_send_dirty_bitmap_page(ring, d, l1,
+                                                       index, bytes) < 0 )
+                    {
+                        ret = -EFAULT;
+                        dprintk(XENLOG_ERR,
+                                "%s: kemari_send_dirty_bitmap_page\n",
+                                __FUNCTION__);
+                        goto out;
+                    }
+                }
+                index += PAGE_SIZE / BYTES_PER_LONG;
+
+                if ( clean && l1 != NULL )
+                    clear_page(l1);
+                pages += bytes << 3;
+                if ( l1 != NULL )
+                    unmap_domain_page(l1);
+            }
+            if ( l2 )
+                unmap_domain_page(l2);
+        }
+        if ( l3 )
+            unmap_domain_page(l3);
+    }
+    unmap_domain_page(l4);
+
+    log_dirty_unlock(d);
+
+    if ( clean )
+    {
+        /* We need to further call clean_dirty_bitmap() functions of specific
+         * paging modes (shadow or hap).  Safe because the domain is paused. */
+        d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+    }
+
+    return ret;
+
+ out:
+    log_dirty_unlock(d);
+
+    return ret;
+}
+
+static void kemari_guest_notify(struct kemari *kemari)
+{
+    if ( likely(kemari != NULL) )
+        notify_via_xen_evtchn_tap(kemari->domain, kemari->port);
+}
+
+/* VM synchronization entry point. */
+static long run_kemari(struct evtchn *lchn, struct evtchn *rchn)
+{
+    long ret;
+    uint32_t port;
+    uint64_t *events;
+    struct domain *d, *rd = lchn->u.interdomain.remote_dom;
+    struct kemari *kemari;
+    struct kemari_ring *ring;
+    struct evtchn *kemari_evtchn;
+
+    if (lchn->tap.mode & KEMARI_TAP_OUT)
+    {
+        domain_pause_for_debugger();
+        d = current->domain;
+        kemari = d->kemari;
+        port = rchn->u.interdomain.remote_port;
+        events = &kemari->taps[port].out_events;
+    }
+    else if (rchn->tap.mode & KEMARI_TAP_IN)
+    {
+        domain_pause_by_systemcontroller(rd);
+        d = rd;
+        kemari = rd->kemari;
+        port = lchn->u.interdomain.remote_port;
+        events = &kemari->taps[port].in_events;
+    }
+    else
+    {
+        ret = 0;
+        goto out;
+    }
+
+    spin_lock(&d->grant_table->lock);
+
+    ++*events;
+
+    kemari_evtchn = evtchn_from_port(d, kemari->port);
+    if (kemari_evtchn->notify_vcpu_id != current->vcpu_id)
+        kemari_evtchn->notify_vcpu_id = current->vcpu_id;
+
+    ring = kemari->ring;
+
+    ret = kemari_send_dirty_bitmap(ring, d);
+    if ( ret < 0 )
+        goto unlock_out;
+
+    kemari_guest_notify(kemari);
+
+    prepare_wait_on_xen_event_channel(kemari->port);
+
+    test_and_clear_bit(_VPF_blocked_in_xen, &current->pause_flags);
+
+    ret = 0;
+
+ unlock_out:
+    spin_unlock(&d->grant_table->lock);
+
+ out:
+    return ret;
+}
+
+static long kemari_bind_tap(struct domain *d,
+                            struct xen_domctl_kemari_op *kemari_op)
+{
+    long ret;
+    struct evtchn_bind_tap bind_tap;
+
+    bind_tap.tap_dom = d->domain_id;
+    bind_tap.tap_port = kemari_op->u.attach.port;
+    bind_tap.mode = kemari_op->u.attach.evtchn_tap_mode;
+    bind_tap.redirect = run_kemari;
+
+    ret = evtchn_bind_tap(&bind_tap);
+
+    return ret;
+}
+
+static long kemari_unbind_tap(struct domain *d,
+                              struct xen_domctl_kemari_op *kemari_op)
+{
+    long ret;
+    struct evtchn_bind_tap unbind_tap;
+
+    unbind_tap.tap_dom = d->domain_id;
+    unbind_tap.tap_port = kemari_op->u.detach.port;
+    unbind_tap.mode = KEMARI_TAP_OFF;
+
+    ret = evtchn_unbind_tap(&unbind_tap);
+
+    return ret;
+}
+
+static long kemari_attach(struct domain *d,
+                          struct xen_domctl_kemari_op *kemari_op)
+{
+    long ret;
+    uint32_t port = kemari_op->u.attach.port;
+    struct kemari *kemari = d->kemari;
+    struct kemari_tap *tap;
+
+    dprintk(XENLOG_DEBUG, "%s: in\n", __FUNCTION__);
+
+    ret = -EINVAL;
+    if ( unlikely(kemari == NULL) )
+    {
+        dprintk(XENLOG_ERR, "kemari is off\n");
+        goto out;
+    }
+    dprintk(XENLOG_DEBUG, "%s: kemari_bind_tap\n", __FUNCTION__);
+    ret =  kemari_bind_tap(d, kemari_op);
+    if (ret < 0)
+    {
+        dprintk(XENLOG_ERR,
+                "couldn't bind evtchn tap port=%u\n", port);
+        goto out;
+    }
+
+    tap = &kemari->taps[port];
+
+    tap->status = KEMARI_TAP_ATTACHED;
+
+ out:
+    dprintk(XENLOG_DEBUG, "%s: out\n", __FUNCTION__);
+    return ret;
+}
+
+static long kemari_detach(struct domain *d,
+                          struct xen_domctl_kemari_op *kemari_op)
+{
+    long ret;
+    uint32_t port = kemari_op->u.detach.port;
+    struct kemari *kemari = d->kemari;
+    struct kemari_tap *tap = &kemari->taps[port];
+
+    ret = -EINVAL;
+    if ( unlikely(kemari == NULL) )
+    {
+        dprintk(XENLOG_ERR, "kemari is off\n");
+        goto out;
+    }
+
+    ret = -EINVAL;
+    if ( unlikely(tap->status != KEMARI_TAP_ATTACHED) )
+        goto out;
+
+    ret =  kemari_unbind_tap(d, kemari_op);
+    if (ret < 0)
+        goto out;
+
+    tap->status = KEMARI_TAP_DETACHED;
+
+ out:
+    return ret;
+}
+
+static void share_kemari_page_with_privileged_guests(struct kemari *kemari)
+{
+    int i;
+    struct kemari_ring *ring = kemari->ring;
+
+    for ( i = 0; i < kemari->num_pages; i++ )
+        share_xen_page_with_privileged_guests(virt_to_page(ring) + i,
+                                              XENSHARE_writable);
+}
+
+static void unshare_kemari_page_with_privileged_guests(struct kemari *kemari)
+{
+    int i;
+
+    for ( i = 0; i < kemari->num_pages; i++ )
+    {
+        struct page_info *page = mfn_to_page(kemari->mfn + i);
+        BUG_ON(page_get_owner(page) != dom_xen);
+        if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
+            put_page(page);
+    }
+}
+
+static void kemari_free_ring(struct domain *d)
+{
+    int order;
+    struct vcpu *v = d->vcpu[0];
+    struct kemari *kemari = d->kemari;
+
+    if ( kemari->ring == NULL   ||
+         kemari->num_pages == 0 ||
+         kemari->port == 0 )
+        return;
+
+    free_xen_event_channel(v, kemari->port);
+
+    unshare_kemari_page_with_privileged_guests(kemari);
+
+    order = get_order_from_pages(kemari->num_pages);
+    free_xenheap_pages(kemari->ring, order);
+
+    kemari->mfn = 0;
+    kemari->ring = NULL;
+    kemari->num_pages = 0;
+    kemari->port = 0;
+}
+
+static long kemari_alloc_ring(struct domain *d, struct kemari *kemari)
+{
+    long ret;
+    unsigned int order;
+    unsigned long num_pages;
+    domid_t current_domid = current->domain->domain_id;
+    struct vcpu *v = d->vcpu[0];
+    struct kemari_ring *ring;
+    unsigned long dirty_bitmap_size;
+    uint32_t hvm_buf_size;
+
+    ret = alloc_unbound_xen_event_channel(v, current_domid);
+    if ( ret < 0 )
+    {
+        dprintk(XENLOG_ERR, "couldn't alloc xen_event_channel\n");
+        goto out;
+    }
+    kemari->port = ret;
+
+    dirty_bitmap_size = (BITS_TO_LONGS(domain_get_maximum_gpfn(d) + 1)
+                         * sizeof(unsigned long));
+
+    ret = -EINVAL;
+    if ( dirty_bitmap_size == 0 || !mfn_valid(d->arch.paging.log_dirty.top) )
+    {
+        dprintk(XENLOG_ERR, "dirty_bitmap is EMPTY\n");
+        goto out_evtchn;
+    }
+
+    hvm_buf_size = hvm_save_size(d);
+    num_pages = (sizeof(struct kemari_ring)
+                 + hvm_buf_size
+                 + (dirty_bitmap_size >> 3)
+                 + PAGE_SIZE - 1) / PAGE_SIZE;
+    order = get_order_from_pages(num_pages);
+    num_pages = (1UL << order);
+
+    dprintk(XENLOG_DEBUG, "ring=%u, bitmap=%lu, ctxt=%u, PAGE=%ld\n",
+            sizeof(struct kemari_ring), dirty_bitmap_size / 8,
+            hvm_buf_size, PAGE_SIZE);
+
+    ret = -ENOMEM;
+    ring = alloc_xenheap_pages(order);
+    if ( ring == NULL )
+    {
+        dprintk(XENLOG_ERR, "couldn't alloc xenheap_pages\n");
+        goto out_evtchn;
+    }
+    memset(ring, 0, PAGE_SIZE * num_pages);
+
+    ring->num_ents =
+        (PAGE_SIZE * num_pages - hvm_buf_size + (long)ring - (long)ring->data)
+        / sizeof(struct kemari_ent);
+    ring->hvm_ctxt.buf_size = hvm_buf_size;
+    ring->hvm_ctxt.buf_offset = PAGE_SIZE * num_pages - hvm_buf_size;
+
+    kemari->num_pages = num_pages;
+    kemari->mfn = virt_to_mfn(ring);
+    kemari->ring = ring;
+
+    share_kemari_page_with_privileged_guests(kemari);
+
+    dprintk(XENLOG_DEBUG, "num_ents=%u, num_pages=%u\n",
+            ring->num_ents, kemari->num_pages);
+
+    return 0;
+
+ out_evtchn:
+    free_xen_event_channel(v, kemari->port);
+ out:
+    return ret;
+}
+
+static long kemari_enable(struct domain *d,
+                          struct xen_domctl_kemari_op *kemari_op)
+{
+    long ret;
+    struct kemari *kemari;
+
+    ret = -EBUSY;
+    if ( unlikely(d->kemari != NULL) )
+    {
+        dprintk(XENLOG_ERR, "kemari already enabled\n");
+        goto out;
+    }
+
+    ret = -ENOMEM;
+       kemari = xmalloc_bytes(sizeof(struct kemari));
+       if ( kemari == NULL )
+    {
+        dprintk(XENLOG_ERR, "couldn't alloc kemari\n");
+               goto out;
+    }
+
+       memset(kemari, 0, sizeof(struct kemari) );
+
+    domain_pause_by_systemcontroller(d);
+
+    ret = kemari_alloc_ring(d, kemari);
+    if ( ret < 0 )
+        goto kemari_free;
+
+    kemari_op->u.enable.port = kemari->port;
+    kemari_op->u.enable.mfn = kemari->mfn;
+    kemari_op->u.enable.num_pages = kemari->num_pages;
+
+    dprintk(XENLOG_DEBUG, "port=%u, mfn=%llu\n", kemari->port, kemari->mfn);
+
+    kemari->domain = d;
+
+    d->kemari = kemari;
+
+    kemari_send_domaininfo_ctxt(kemari->ring, d);
+
+    domain_unpause_by_systemcontroller(d);
+
+    dprintk(XENLOG_DEBUG, "kemari enabled\n");
+    return 0;
+
+ kemari_free:
+    xfree(kemari);
+    domain_unpause_by_systemcontroller(d);
+ out:
+    return ret;
+}
+
+long kemari_off(struct domain *d)
+{
+    long ret;
+    uint32_t port;
+    struct kemari *kemari = d->kemari;
+    struct kemari_tap *tap;
+    struct evtchn_bind_tap kemari_unbind_tap;
+
+    ret = -EINVAL;
+    if ( unlikely(kemari == NULL) )
+    {
+        dprintk(XENLOG_ERR, "kemari already off\n");
+        goto out;
+    }
+
+    domain_pause_by_systemcontroller(d);
+
+    kemari_unbind_tap.tap_dom = d->domain_id;
+
+    for ( port = 0; port < NUM_KEMARI_TAPS; port++ ) {
+        tap = &kemari->taps[port];
+
+        if ( (tap->status != KEMARI_TAP_ATTACHED) ||
+             (!port_is_valid(d, port)) )
+            continue;
+
+        kemari_unbind_tap.tap_port = port;
+
+        if ( evtchn_unbind_tap(&kemari_unbind_tap) < 0 )
+            dprintk(XENLOG_ERR,
+                    "couldn't unbind evtchn tap port=%u\n", port);
+    }
+
+    if ( kemari->ring )
+        kemari_free_ring(d);
+
+    xfree(kemari);
+
+    d->kemari = NULL;
+
+    domain_unpause_by_systemcontroller(d);
+
+    return 0;
+
+ out:
+    return ret;
+}
+
+long do_kemari_op(struct domain *d, struct xen_domctl_kemari_op *kemari_op)
+{
+    static DEFINE_SPINLOCK(lock);
+    long ret;
+
+    /* We don't support calling kemari by itself or dom0. */
+    if ( d == current->domain || d == dom0 )
+    {
+        dprintk(XENLOG_ERR, "can't attach kemari by itself or to dom0");
+        return -EINVAL;
+    }
+
+    spin_lock(&lock);
+
+    switch ( kemari_op->cmd )
+    {
+    case  XEN_KEMARI_OP_enable:
+        ret = kemari_enable(d, kemari_op);
+        break;
+
+    case XEN_KEMARI_OP_off:
+        ret = kemari_off(d);
+        break;
+
+    case  XEN_KEMARI_OP_attach:
+        ret = kemari_attach(d, kemari_op);
+        break;
+
+    case XEN_KEMARI_OP_detach:
+        ret = kemari_detach(d, kemari_op);
+        break;
+
+    default:
+        ret = -EINVAL;
+        break;
+    }
+
+    spin_unlock(&lock);
+
+    return ret;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 19201eebab16 xen/include/public/kemari.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/public/kemari.h       Wed Mar 04 17:04:29 2009 +0900
@@ -0,0 +1,97 @@
+/******************************************************************************
+ * kemari.h
+ *
+ * Tools interface to Kemari.
+ *
+ * Copyright (c) 2008 Nippon Telegraph and Telephone Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __XEN_PUBLIC_KEMARI_H__
+#define __XEN_PUBLIC_KEMARI_H__
+
+#define KEMARI_TAP_OFF 0
+#define KEMARI_TAP_IN  1
+#define KEMARI_TAP_OUT 2
+
+struct kemari_ring {
+    uint32_t cons;
+    uint32_t prod;
+    uint32_t num_ents;
+    unsigned int dirty_bitmap_size; /* num of ditry bits */
+    struct {
+        uint32_t buf_size;
+        uint32_t rec_size;
+        uint32_t  buf_offset;
+    } hvm_ctxt;
+    char     data[1];
+};
+
+struct kemari_ent {
+    union {
+        struct {
+            uint16_t pages;
+            uint16_t port;
+        } header;
+        struct {
+            uint16_t start;
+            uint16_t end;
+        } index;
+        unsigned long dirty_bitmap;
+    } u;
+};
+
+#define KEMARI_RING_GET_PROD(_ring) \
+    (&((struct kemari_ent *)(_ring)->data)[(_ring)->prod % (_ring)->num_ents])
+
+#define KEMARI_RING_GET_CONS(_ring) \
+    (&((struct kemari_ent *)(_ring)->data)[(_ring)->cons % (_ring)->num_ents])
+
+static inline void kemari_ring_read(struct kemari_ring *ring,
+                                    struct kemari_ent **buf)
+{
+    *buf = KEMARI_RING_GET_CONS(ring);
+#ifdef __XEN__
+    wmb();
+#elif __XEN_TOOLS__
+    xen_wmb();
+#endif
+    ring->cons++;
+}
+
+static inline void kemari_ring_write(struct kemari_ring *ring,
+                                     struct kemari_ent *buf)
+{
+    memcpy(KEMARI_RING_GET_PROD(ring), buf, sizeof(struct kemari_ent));
+#ifdef __XEN__
+    wmb();
+#elif __XEN_TOOLS__
+    xen_wmb();
+#endif
+    ring->prod++;
+}
+
+#endif /* __XEN_PUBLIC_KEMARI_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r 19201eebab16 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Thu Sep 25 13:33:50 2008 +0100
+++ b/xen/arch/x86/domain.c     Wed Mar 04 17:04:27 2009 +0900
@@ -1821,6 +1821,10 @@
     /* Free page used by xen oprofile buffer. */
     free_xenoprof_pages(d);

+    /* Turn off Kemari. */
+    if ( d->kemari )
+        kemari_off(d);
+
     if ( is_hvm_domain(d) )
         hvm_domain_relinquish_resources(d);

diff -r 19201eebab16 xen/arch/x86/domctl.c
--- a/xen/arch/x86/domctl.c     Thu Sep 25 13:33:50 2008 +0100
+++ b/xen/arch/x86/domctl.c     Wed Mar 04 17:04:27 2009 +0900
@@ -20,6 +20,7 @@
 #include <xen/trace.h>
 #include <xen/console.h>
 #include <xen/iocap.h>
+#include <xen/kemari.h>
 #include <xen/paging.h>
 #include <asm/irq.h>
 #include <asm/hvm/hvm.h>
@@ -997,6 +998,21 @@
     }
     break;

+    case XEN_DOMCTL_kemari_op:
+    {
+        struct domain *d = rcu_lock_domain_by_id(domctl->domain);
+
+        ret = -ESRCH;
+        if ( unlikely(d == NULL) )
+            break;
+
+        ret = do_kemari_op(d, &domctl->u.kemari_op);
+
+        copy_to_guest(u_domctl, domctl, 1);
+        rcu_unlock_domain(d);
+    }
+    break;
+
     default:
         ret = -ENOSYS;
         break;
diff -r 19201eebab16 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h       Thu Sep 25 13:33:50 2008 +0100
+++ b/xen/include/public/domctl.h       Wed Mar 04 17:04:29 2009 +0900
@@ -614,6 +614,38 @@
 #define XEN_DOMCTL_set_machine_address_size  51
 #define XEN_DOMCTL_get_machine_address_size  52

+/* Kemari interface */
+#define XEN_DOMCTL_kemari_op         53
+
+#define _XEN_KEMARI_OP_enable 0
+#define XEN_KEMARI_OP_enable  (1UL<<_XEN_KEMARI_OP_enable)
+#define _XEN_KEMARI_OP_off    1
+#define XEN_KEMARI_OP_off     (1UL<<_XEN_KEMARI_OP_off)
+#define _XEN_KEMARI_OP_attach 2
+#define XEN_KEMARI_OP_attach  (1UL<<_XEN_KEMARI_OP_attach)
+#define _XEN_KEMARI_OP_detach 3
+#define XEN_KEMARI_OP_detach  (1UL<<_XEN_KEMARI_OP_detach)
+
+struct xen_domctl_kemari_op {
+    uint32_t cmd;
+
+    union {
+        struct {
+            uint32_t port;
+            uint32_t num_pages;
+            uint64_t mfn;
+        } enable; /* XEN_KEMARI_OP_enable */
+        struct {
+            uint32_t port;
+            uint16_t evtchn_tap_mode;
+        } attach; /* XEN_KEMARI_OP_attach */
+        struct {
+            uint32_t port;
+        } detach; /* XEN_KEMARI_OP_detach */
+    } u;
+};
+typedef struct xen_domctl_kemari_op xen_domctl_kemari_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_kemari_op_t);

 struct xen_domctl {
     uint32_t cmd;
@@ -654,6 +686,7 @@
         struct xen_domctl_set_opt_feature   set_opt_feature;
         struct xen_domctl_set_target        set_target;
         struct xen_domctl_subscribe         subscribe;
+        struct xen_domctl_kemari_op         kemari_op;
 #if defined(__i386__) || defined(__x86_64__)
         struct xen_domctl_cpuid             cpuid;
 #endif
diff -r 19201eebab16 xen/include/public/io/xenbus.h
--- a/xen/include/public/io/xenbus.h    Thu Sep 25 13:33:50 2008 +0100
+++ b/xen/include/public/io/xenbus.h    Wed Mar 04 17:04:29 2009 +0900
@@ -63,7 +63,9 @@
      */
     XenbusStateReconfiguring = 7,

-    XenbusStateReconfigured  = 8
+    XenbusStateReconfigured  = 8,
+
+    XenbusStateAttached      = 9
 };
 typedef enum xenbus_state XenbusState;



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.