WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] Merge.

# HG changeset patch
# User adsharma@xxxxxxxxxxxxxxxxxxxx
# Node ID 23979fb12c4908a5743b833da8d87e73677c5461
# Parent  6a6c4a422780f0aeb357f2fd8286a36afd3876b8
# Parent  fbdbe4fc218de40d5176e0104908e05fb6e2c6ce
Merge.

diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile        Tue Aug 16 
22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile        Wed Aug 17 
20:33:56 2005
@@ -44,7 +44,7 @@
 c-obj-$(CONFIG_EFI)            += efi.o efi_stub.o
 c-obj-$(CONFIG_EARLY_PRINTK)   += early_printk.o
 c-obj-$(CONFIG_SMP_ALTERNATIVES)+= smpalts.o
-c-obj-$(CONFIG_SWIOTLB)                += swiotlb.o
+obj-$(CONFIG_SWIOTLB)          += swiotlb.o
 
 EXTRA_AFLAGS   := -traditional
 
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c    Tue Aug 16 
22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c    Wed Aug 17 
20:33:56 2005
@@ -115,9 +115,6 @@
 EXPORT_SYMBOL(__copy_to_user_ll);
 EXPORT_SYMBOL(strnlen_user);
 
-EXPORT_SYMBOL(dma_alloc_coherent);
-EXPORT_SYMBOL(dma_free_coherent);
-
 #ifdef CONFIG_PCI
 EXPORT_SYMBOL(pci_mem_start);
 #endif
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c       Tue Aug 16 
22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c       Wed Aug 17 
20:33:56 2005
@@ -24,13 +24,14 @@
        unsigned long   *bitmap;
 };
 
-static void iommu_bug(void)
-{
-       printk(KERN_ALERT "Fatal DMA error! Please use 'swiotlb=force'\n");
-       BUG();
-}
-
-#define IOMMU_BUG_ON(test) do { if (unlikely(test)) iommu_bug(); } while(0)
+#define IOMMU_BUG_ON(test)                             \
+do {                                                   \
+       if (unlikely(test)) {                           \
+               printk(KERN_ALERT "Fatal DMA error! "   \
+                      "Please use 'swiotlb=force'\n"); \
+               BUG();                                  \
+       }                                               \
+} while (0)
 
 int
 dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c        Tue Aug 16 
22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c        Wed Aug 17 
20:33:56 2005
@@ -35,6 +35,7 @@
 #include <asm/pgtable.h>
 #include <asm-xen/hypervisor.h>
 #include <asm-xen/balloon.h>
+#include <linux/module.h>
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 #include <linux/percpu.h>
 #include <asm/tlbflush.h>
@@ -352,7 +353,6 @@
     balloon_unlock(flags);
 }
 
-#ifdef CONFIG_XEN_PHYSDEV_ACCESS
 
 unsigned long allocate_empty_lowmem_region(unsigned long pages)
 {
@@ -401,4 +401,4 @@
     return vstart;
 }
 
-#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
+EXPORT_SYMBOL(allocate_empty_lowmem_region);
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Tue Aug 16 22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c     Wed Aug 17 20:33:56 2005
@@ -256,19 +256,23 @@
     char *str;
 
     str = (char *)xenbus_read("control", "shutdown", NULL);
-    /* Ignore read errors and recursive shutdown events. */
-    if (IS_ERR(str) || !strcmp(str, __stringify(SHUTDOWN_INVALID)))
+    /* Ignore read errors. */
+    if (IS_ERR(str))
         return;
-
-    xenbus_printf("control", "shutdown", "%i", SHUTDOWN_INVALID);
-
-    if (strcmp(str, "poweroff") == 0) {
+    if (strlen(str) == 0) {
+        kfree(str);
+        return;
+    }
+
+    xenbus_write("control", "shutdown", "", O_CREAT);
+
+    if (strcmp(str, "poweroff") == 0)
         shutting_down = SHUTDOWN_POWEROFF;
-    } else if (strcmp(str, "reboot") == 0) {
+    else if (strcmp(str, "reboot") == 0)
         shutting_down = SHUTDOWN_REBOOT;
-    } else if (strcmp(str, "suspend") == 0) {
+    else if (strcmp(str, "suspend") == 0)
         shutting_down = SHUTDOWN_SUSPEND;
-    } else {
+    else {
         printk("Ignoring shutdown request: %s\n", str);
         shutting_down = SHUTDOWN_INVALID;
     }
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c     Tue Aug 16 22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c     Wed Aug 17 20:33:56 2005
@@ -5,8 +5,6 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/errno.h>
 #include <linux/netdevice.h>
 #include <linux/inetdevice.h>
 #include <linux/etherdevice.h>
@@ -14,34 +12,86 @@
 #include <linux/init.h>
 #include <asm/io.h>
 #include <asm/page.h>
-
-EXPORT_SYMBOL(__dev_alloc_skb);
+#include <asm-xen/hypervisor.h>
 
 /* Referenced in netback.c. */
 /*static*/ kmem_cache_t *skbuff_cachep;
 
-/* Size must be cacheline-aligned (alloc_skb uses SKB_DATA_ALIGN). */
-#define XEN_SKB_SIZE \
-    ((PAGE_SIZE - sizeof(struct skb_shared_info)) & ~(SMP_CACHE_BYTES - 1))
+#define MAX_SKBUFF_ORDER 2
+static kmem_cache_t *skbuff_order_cachep[MAX_SKBUFF_ORDER + 1];
 
 struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask)
 {
-    struct sk_buff *skb;
-    skb = alloc_skb_from_cache(skbuff_cachep, length + 16, gfp_mask);
-    if ( likely(skb != NULL) )
-        skb_reserve(skb, 16);
-    return skb;
+       struct sk_buff *skb;
+       int order;
+
+       length = SKB_DATA_ALIGN(length + 16);
+       order = get_order(length + sizeof(struct skb_shared_info));
+       if (order > MAX_SKBUFF_ORDER) {
+               printk(KERN_ALERT "Attempt to allocate order %d skbuff. "
+                      "Increase MAX_SKBUFF_ORDER.\n", order);
+               return NULL;
+       }
+
+       skb = alloc_skb_from_cache(
+               skbuff_order_cachep[order], length, gfp_mask);
+       if (skb != NULL)
+               skb_reserve(skb, 16);
+
+       return skb;
 }
 
 static void skbuff_ctor(void *buf, kmem_cache_t *cachep, unsigned long unused)
 {
-    scrub_pages(buf, 1);
+       int order = 0;
+
+       while (skbuff_order_cachep[order] != cachep)
+               order++;
+
+       if (order != 0)
+               xen_create_contiguous_region((unsigned long)buf, order);
+
+       scrub_pages(buf, 1 << order);
+}
+
+static void skbuff_dtor(void *buf, kmem_cache_t *cachep, unsigned long unused)
+{
+       int order = 0;
+
+       while (skbuff_order_cachep[order] != cachep)
+               order++;
+
+       if (order != 0)
+               xen_destroy_contiguous_region((unsigned long)buf, order);
 }
 
 static int __init skbuff_init(void)
 {
-    skbuff_cachep = kmem_cache_create(
-        "xen-skb", PAGE_SIZE, PAGE_SIZE, 0, skbuff_ctor, NULL);
-    return 0;
+       static char name[MAX_SKBUFF_ORDER + 1][20];
+       unsigned long size;
+       int order;
+
+       for (order = 0; order <= MAX_SKBUFF_ORDER; order++) {
+               size = PAGE_SIZE << order;
+               sprintf(name[order], "xen-skb-%lu", size);
+               skbuff_order_cachep[order] = kmem_cache_create(
+                       name[order], size, size, 0, skbuff_ctor, skbuff_dtor);
+       }
+
+       skbuff_cachep = skbuff_order_cachep[0];
+
+       return 0;
 }
 __initcall(skbuff_init);
+
+EXPORT_SYMBOL(__dev_alloc_skb);
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c   Tue Aug 16 22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c   Wed Aug 17 20:33:56 2005
@@ -250,7 +250,11 @@
           happen within a race in page table update. In the later
           case just flush. */
 
-       pgd = pgd_offset(current->mm ?: &init_mm, address);
+       /* On Xen the line below does not always work. Needs investigating! */
+       /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/
+       pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id());
+       pgd += pgd_index(address);
+
        pgd_ref = pgd_offset_k(address);
        if (pgd_none(*pgd_ref))
                return -1;
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h    Tue Aug 16 
22:27:16 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h    Wed Aug 17 
20:33:56 2005
@@ -1,6 +1,33 @@
-/* Private include for xenbus communications. */
+/*
+ * Private include for xenbus communications.
+ * 
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
 #ifndef _XENBUS_COMMS_H
 #define _XENBUS_COMMS_H
+
 int xs_init(void);
 int xb_init_comms(void);
 void xb_suspend_comms(void);
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Tue Aug 16 
22:27:16 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c    Wed Aug 17 
20:33:56 2005
@@ -309,6 +309,7 @@
 void xenbus_resume(void)
 {
        xb_init_comms();
+       reregister_xenbus_watches();
        up(&xenbus_lock);
 }
 
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c       Tue Aug 16 
22:27:16 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c       Wed Aug 17 
20:33:56 2005
@@ -496,6 +496,18 @@
                       watch->node, err);
 }
 
+/* Re-register callbacks to all watches. */
+void reregister_xenbus_watches(void)
+{
+       struct xenbus_watch *watch;
+       char token[sizeof(watch) * 2 + 1];
+
+       list_for_each_entry(watch, &watches, list) {
+               sprintf(token, "%lX", (long)watch);
+               xs_watch(watch->node, token);
+       }
+}
+
 static int watch_thread(void *unused)
 {
        for (;;) {
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/include/asm-xen/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Tue Aug 16 22:27:16 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Wed Aug 17 20:33:56 2005
@@ -137,10 +137,8 @@
 void xen_create_contiguous_region(unsigned long vstart, unsigned int order);
 void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order);
 
-#ifdef CONFIG_XEN_PHYSDEV_ACCESS
 /* Allocate a contiguous empty region of low memory. Return virtual start. */
 unsigned long allocate_empty_lowmem_region(unsigned long pages);
-#endif
 
 #include <asm/hypercall.h>
 
diff -r 6a6c4a422780 -r 23979fb12c49 
linux-2.6-xen-sparse/include/asm-xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h     Tue Aug 16 22:27:16 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h     Wed Aug 17 20:33:56 2005
@@ -1,5 +1,3 @@
-#ifndef _ASM_XEN_XENBUS_H
-#define _ASM_XEN_XENBUS_H
 /******************************************************************************
  * xenbus.h
  *
@@ -28,6 +26,10 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
+
+#ifndef _ASM_XEN_XENBUS_H
+#define _ASM_XEN_XENBUS_H
+
 #include <linux/device.h>
 #include <linux/notifier.h>
 #include <asm/semaphore.h>
@@ -119,6 +121,7 @@
 
 int register_xenbus_watch(struct xenbus_watch *watch);
 void unregister_xenbus_watch(struct xenbus_watch *watch);
+void reregister_xenbus_watches(void);
 
 /* Called from xen core code. */
 void xenbus_suspend(void);
diff -r 6a6c4a422780 -r 23979fb12c49 tools/examples/network-bridge
--- a/tools/examples/network-bridge     Tue Aug 16 22:27:16 2005
+++ b/tools/examples/network-bridge     Wed Aug 17 20:33:56 2005
@@ -189,7 +189,7 @@
        fi
        ip link set ${netdev} name p${netdev}
        ip link set veth0 name ${netdev}
-       ifconfig p${netdev} -arp down
+       ifconfig p${netdev} 0.0.0.0 -arp down
        ifconfig p${netdev} hw ether fe:ff:ff:ff:ff:ff
        ifconfig ${netdev} hw ether ${mac}
        add_to_bridge ${bridge} vif0.0
diff -r 6a6c4a422780 -r 23979fb12c49 tools/misc/xend
--- a/tools/misc/xend   Tue Aug 16 22:27:16 2005
+++ b/tools/misc/xend   Wed Aug 17 20:33:56 2005
@@ -117,11 +117,15 @@
        return    
 
 def start_xenstored():
-    s,o = commands.getstatusoutput("/usr/sbin/xenstored 
--pid-file=/var/run/xenstore.pid");
+    XENSTORED_TRACE = os.getenv("XENSTORED_TRACE")
+    cmd = "/usr/sbin/xenstored --pid-file=/var/run/xenstore.pid"
+    if XENSTORED_TRACE:
+        cmd += " -T /var/log/xenstored-trace.log"
+    s,o = commands.getstatusoutput(cmd)
 
 def start_consoled():
     if os.fork() == 0:
-        os.execvp('/usr/sbin/xenconsoled', ['/usr/sbin/xenconsoled']);
+        os.execvp('/usr/sbin/xenconsoled', ['/usr/sbin/xenconsoled'])
             
 def main():
     try:
diff -r 6a6c4a422780 -r 23979fb12c49 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py       Tue Aug 16 22:27:16 2005
+++ b/tools/python/xen/xend/XendDomain.py       Wed Aug 17 20:33:56 2005
@@ -320,8 +320,7 @@
         @param vmconfig: vm configuration
         """
         config = sxp.child_value(vmconfig, 'config')
-        uuid = sxp.child_value(vmconfig, 'uuid')
-        dominfo = XendDomainInfo.restore(self.dbmap, config, uuid=uuid)
+        dominfo = XendDomainInfo.restore(self.dbmap, config)
         return dominfo
 
     def domain_restore(self, src, progress=False):
diff -r 6a6c4a422780 -r 23979fb12c49 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py   Tue Aug 16 22:27:16 2005
+++ b/tools/python/xen/xend/XendDomainInfo.py   Wed Aug 17 20:33:56 2005
@@ -195,19 +195,22 @@
 
     recreate = classmethod(recreate)
 
-    def restore(cls, parentdb, config, uuid):
+    def restore(cls, parentdb, config, uuid=None):
         """Create a domain and a VM object to do a restore.
 
         @param parentdb:  parent db
         @param config:    domain configuration
         @param uuid:      uuid to use
         """
+        if not uuid:
+            uuid = getUuid()
         db = parentdb.addChild(uuid)
         vm = cls(db)
         ssidref = int(sxp.child_value(config, 'ssidref'))
         log.debug('restoring with ssidref='+str(ssidref))
         id = xc.domain_create(ssidref = ssidref)
         vm.setdom(id)
+        vm.clear_shutdown()
         try:
             vm.restore = True
             vm.construct(config)
@@ -979,6 +982,11 @@
         if not reason in ['suspend']:
             self.shutdown_pending = {'start':time.time(), 'reason':reason}
 
+    def clear_shutdown(self):
+        db = self.db.addChild("/control")
+        db['shutdown'] = ""
+        db.saveDB(save=True)
+
     def send_sysrq(self, key=0):
         db = self.db.addChild("/control");
         db['sysrq'] = '%c' % key;
diff -r 6a6c4a422780 -r 23979fb12c49 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py     Tue Aug 16 22:27:16 2005
+++ b/tools/python/xen/xm/create.py     Wed Aug 17 20:33:56 2005
@@ -380,7 +380,6 @@
 
     @return: MAC address string
     """
-    random.seed()
     mac = [ 0xaa, 0x00, 0x00,
             random.randint(0x00, 0x7f),
             random.randint(0x00, 0xff),
@@ -689,6 +688,7 @@
     del xc
 
 def main(argv):
+    random.seed()
     opts = gopts
     args = opts.parse(argv)
     if opts.vals.help:
diff -r 6a6c4a422780 -r 23979fb12c49 tools/xenstore/xenstored.h
--- a/tools/xenstore/xenstored.h        Tue Aug 16 22:27:16 2005
+++ b/tools/xenstore/xenstored.h        Wed Aug 17 20:33:56 2005
@@ -1,21 +1,29 @@
-/* 
-    Simple prototyle Xen Store Daemon providing simple tree-like database.
-    Copyright (C) 2005 Rusty Russell IBM Corporation
+/*
+ * Simple prototyle Xen Store Daemon providing simple tree-like database.
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
 
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 2 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU General Public License for more details.
-
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-*/
 #ifndef _XENSTORED_H
 #define _XENSTORED_H
 
diff -r 6a6c4a422780 -r 23979fb12c49 xen/arch/ia64/xenmisc.c
--- a/xen/arch/ia64/xenmisc.c   Tue Aug 16 22:27:16 2005
+++ b/xen/arch/ia64/xenmisc.c   Wed Aug 17 20:33:56 2005
@@ -280,7 +280,6 @@
 
 unsigned long context_switch_count = 0;
 
-// context_switch
 void context_switch(struct vcpu *prev, struct vcpu *next)
 {
 //printk("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
@@ -290,22 +289,14 @@
 //if (prev->domain->domain_id == 0 && next->domain->domain_id == 1) cs01foo();
 //printk("@@sw %d->%d\n",prev->domain->domain_id,next->domain->domain_id);
 #ifdef CONFIG_VTI
-       unsigned long psr;
-       /* Interrupt is enabled after next task is chosen.
-        * So we have to disable it for stack switch.
-        */
-       local_irq_save(psr);
        vtm_domain_out(prev);
-       /* Housekeeping for prev domain */
-#endif // CONFIG_VTI
-
+#endif
        context_switch_count++;
        switch_to(prev,next,prev);
 #ifdef CONFIG_VTI
-       /* Post-setup for new domain */
         vtm_domain_in(current);
-       local_irq_restore(psr);
-#endif // CONFIG_VTI
+#endif
+
 // leave this debug for now: it acts as a heartbeat when more than
 // one domain is active
 {
@@ -315,25 +306,27 @@
 if (!cnt[id]--) { printk("%x",id); cnt[id] = 500000; }
 if (!i--) { printk("+",id); i = 1000000; }
 }
-       clear_bit(_VCPUF_running, &prev->vcpu_flags);
-       //if (!is_idle_task(next->domain) )
-               //send_guest_virq(next, VIRQ_TIMER);
+
 #ifdef CONFIG_VTI
        if (VMX_DOMAIN(current))
                vmx_load_all_rr(current);
-       return;
-#else // CONFIG_VTI
+#else
        if (!is_idle_task(current->domain)) {
                load_region_regs(current);
                if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
        }
        if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
-#endif // CONFIG_VTI
+#endif
+}
+
+void context_switch_finalise(struct vcpu *next)
+{
+       /* nothing to do */
 }
 
 void continue_running(struct vcpu *same)
 {
-    /* nothing to do */
+       /* nothing to do */
 }
 
 void panic_domain(struct pt_regs *regs, const char *fmt, ...)
diff -r 6a6c4a422780 -r 23979fb12c49 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c     Tue Aug 16 22:27:16 2005
+++ b/xen/arch/x86/domain.c     Wed Aug 17 20:33:56 2005
@@ -48,6 +48,8 @@
 
 struct percpu_ctxt {
     struct vcpu *curr_vcpu;
+    unsigned int context_not_finalised;
+    unsigned int dirty_segment_mask;
 } __cacheline_aligned;
 static struct percpu_ctxt percpu_ctxt[NR_CPUS];
 
@@ -541,51 +543,59 @@
     __r; })
 
 #if CONFIG_VMX
-#define load_msrs(_p, _n)     if (vmx_switch_on) vmx_load_msrs((_p), (_n))
+#define load_msrs(n)     if (vmx_switch_on) vmx_load_msrs(n)
 #else
-#define load_msrs(_p, _n)     ((void)0)
+#define load_msrs(n)     ((void)0)
 #endif 
 
-static void load_segments(struct vcpu *p, struct vcpu *n)
-{
-    struct vcpu_guest_context *pctxt = &p->arch.guest_context;
+/*
+ * save_segments() writes a mask of segments which are dirty (non-zero),
+ * allowing load_segments() to avoid some expensive segment loads and
+ * MSR writes.
+ */
+#define DIRTY_DS           0x01
+#define DIRTY_ES           0x02
+#define DIRTY_FS           0x04
+#define DIRTY_GS           0x08
+#define DIRTY_FS_BASE      0x10
+#define DIRTY_GS_BASE_USER 0x20
+
+static void load_segments(struct vcpu *n)
+{
     struct vcpu_guest_context *nctxt = &n->arch.guest_context;
     int all_segs_okay = 1;
+    unsigned int dirty_segment_mask, cpu = smp_processor_id();
+
+    /* Load and clear the dirty segment mask. */
+    dirty_segment_mask = percpu_ctxt[cpu].dirty_segment_mask;
+    percpu_ctxt[cpu].dirty_segment_mask = 0;
 
     /* Either selector != 0 ==> reload. */
-    if ( unlikely(pctxt->user_regs.ds | nctxt->user_regs.ds) )
+    if ( unlikely((dirty_segment_mask & DIRTY_DS) | nctxt->user_regs.ds) )
         all_segs_okay &= loadsegment(ds, nctxt->user_regs.ds);
 
     /* Either selector != 0 ==> reload. */
-    if ( unlikely(pctxt->user_regs.es | nctxt->user_regs.es) )
+    if ( unlikely((dirty_segment_mask & DIRTY_ES) | nctxt->user_regs.es) )
         all_segs_okay &= loadsegment(es, nctxt->user_regs.es);
 
     /*
      * Either selector != 0 ==> reload.
      * Also reload to reset FS_BASE if it was non-zero.
      */
-    if ( unlikely(pctxt->user_regs.fs |
-                  pctxt->fs_base |
+    if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) |
                   nctxt->user_regs.fs) )
-    {
         all_segs_okay &= loadsegment(fs, nctxt->user_regs.fs);
-        if ( pctxt->user_regs.fs ) /* != 0 selector kills fs_base */
-            pctxt->fs_base = 0;
-    }
 
     /*
      * Either selector != 0 ==> reload.
      * Also reload to reset GS_BASE if it was non-zero.
      */
-    if ( unlikely(pctxt->user_regs.gs |
-                  pctxt->gs_base_user |
+    if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) |
                   nctxt->user_regs.gs) )
     {
         /* Reset GS_BASE with user %gs? */
-        if ( pctxt->user_regs.gs || !nctxt->gs_base_user )
+        if ( (dirty_segment_mask & DIRTY_GS) || !nctxt->gs_base_user )
             all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs);
-        if ( pctxt->user_regs.gs ) /* != 0 selector kills gs_base_user */
-            pctxt->gs_base_user = 0;
     }
 
     /* This can only be non-zero if selector is NULL. */
@@ -650,7 +660,9 @@
 
 static void save_segments(struct vcpu *v)
 {
-    struct cpu_user_regs *regs = &v->arch.guest_context.user_regs;
+    struct vcpu_guest_context *ctxt = &v->arch.guest_context;
+    struct cpu_user_regs      *regs = &ctxt->user_regs;
+    unsigned int dirty_segment_mask = 0;
 
     if ( VMX_DOMAIN(v) )
         rdmsrl(MSR_SHADOW_GS_BASE, v->arch.arch_vmx.msr_content.shadow_gs);
@@ -659,18 +671,34 @@
     __asm__ __volatile__ ( "movl %%es,%0" : "=m" (regs->es) );
     __asm__ __volatile__ ( "movl %%fs,%0" : "=m" (regs->fs) );
     __asm__ __volatile__ ( "movl %%gs,%0" : "=m" (regs->gs) );
-}
-
-static void clear_segments(void)
-{
-    __asm__ __volatile__ (
-        " movl %0,%%ds; "
-        " movl %0,%%es; "
-        " movl %0,%%fs; "
-        " movl %0,%%gs; "
-        ""safe_swapgs"  "
-        " movl %0,%%gs"
-        : : "r" (0) );
+
+    if ( regs->ds )
+        dirty_segment_mask |= DIRTY_DS;
+
+    if ( regs->es )
+        dirty_segment_mask |= DIRTY_ES;
+
+    if ( regs->fs )
+    {
+        dirty_segment_mask |= DIRTY_FS;
+        ctxt->fs_base = 0; /* != 0 selector kills fs_base */
+    }
+    else if ( ctxt->fs_base )
+    {
+        dirty_segment_mask |= DIRTY_FS_BASE;
+    }
+
+    if ( regs->gs )
+    {
+        dirty_segment_mask |= DIRTY_GS;
+        ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */
+    }
+    else if ( ctxt->gs_base_user )
+    {
+        dirty_segment_mask |= DIRTY_GS_BASE_USER;
+    }
+
+    percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask;
 }
 
 long do_switch_to_user(void)
@@ -706,10 +734,9 @@
 
 #elif defined(__i386__)
 
-#define load_segments(_p, _n) ((void)0)
-#define load_msrs(_p, _n)     ((void)0)
-#define save_segments(_p)     ((void)0)
-#define clear_segments()      ((void)0)
+#define load_segments(n) ((void)0)
+#define load_msrs(n)     ((void)0)
+#define save_segments(p) ((void)0)
 
 static inline void switch_kernel_stack(struct vcpu *n, unsigned int cpu)
 {
@@ -726,9 +753,9 @@
 static void __context_switch(void)
 {
     struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
-    unsigned int         cpu = smp_processor_id();
-    struct vcpu  *p = percpu_ctxt[cpu].curr_vcpu;
-    struct vcpu  *n = current;
+    unsigned int          cpu = smp_processor_id();
+    struct vcpu          *p = percpu_ctxt[cpu].curr_vcpu;
+    struct vcpu          *n = current;
 
     if ( !is_idle_task(p->domain) )
     {
@@ -786,23 +813,31 @@
 
 void context_switch(struct vcpu *prev, struct vcpu *next)
 {
-    struct vcpu *realprev;
-
-    local_irq_disable();
+    unsigned int cpu = smp_processor_id();
+
+    ASSERT(!local_irq_is_enabled());
 
     set_current(next);
 
-    if ( ((realprev = percpu_ctxt[smp_processor_id()].curr_vcpu) == next) || 
-         is_idle_task(next->domain) )
-    {
-        local_irq_enable();
-    }
-    else
+    if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) )
     {
         __context_switch();
-
-        local_irq_enable();
-        
+        percpu_ctxt[cpu].context_not_finalised = 1;
+    }
+}
+
+void context_switch_finalise(struct vcpu *next)
+{
+    unsigned int cpu = smp_processor_id();
+
+    ASSERT(local_irq_is_enabled());
+
+    if ( percpu_ctxt[cpu].context_not_finalised )
+    {
+        percpu_ctxt[cpu].context_not_finalised = 0;
+
+        BUG_ON(percpu_ctxt[cpu].curr_vcpu != next);
+
         if ( VMX_DOMAIN(next) )
         {
             vmx_restore_msrs(next);
@@ -810,18 +845,10 @@
         else
         {
             load_LDT(next);
-            load_segments(realprev, next);
-            load_msrs(realprev, next);
-        }
-    }
-
-    /*
-     * We do this late on because it doesn't need to be protected by the
-     * schedule_lock, and because we want this to be the very last use of
-     * 'prev' (after this point, a dying domain's info structure may be freed
-     * without warning). 
-     */
-    clear_bit(_VCPUF_running, &prev->vcpu_flags);
+            load_segments(next);
+            load_msrs(next);
+        }
+    }
 
     schedule_tail(next);
     BUG();
@@ -835,12 +862,19 @@
 
 int __sync_lazy_execstate(void)
 {
-    if ( percpu_ctxt[smp_processor_id()].curr_vcpu == current )
-        return 0;
-    __context_switch();
-    load_LDT(current);
-    clear_segments();
-    return 1;
+    unsigned long flags;
+    int switch_required;
+
+    local_irq_save(flags);
+
+    switch_required = (percpu_ctxt[smp_processor_id()].curr_vcpu != current);
+
+    if ( switch_required )
+        __context_switch();
+
+    local_irq_restore(flags);
+
+    return switch_required;
 }
 
 void sync_lazy_execstate_cpu(unsigned int cpu)
diff -r 6a6c4a422780 -r 23979fb12c49 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c        Tue Aug 16 22:27:16 2005
+++ b/xen/arch/x86/vmx.c        Wed Aug 17 20:33:56 2005
@@ -65,7 +65,7 @@
  * are not modified once set for generic domains, we don't save them, 
  * but simply reset them to the values set at percpu_traps_init().
  */
-void vmx_load_msrs(struct vcpu *p, struct vcpu *n)
+void vmx_load_msrs(struct vcpu *n)
 {
     struct msr_state *host_state;
     host_state = &percpu_msr[smp_processor_id()];
diff -r 6a6c4a422780 -r 23979fb12c49 xen/common/schedule.c
--- a/xen/common/schedule.c     Tue Aug 16 22:27:16 2005
+++ b/xen/common/schedule.c     Wed Aug 17 20:33:56 2005
@@ -474,13 +474,14 @@
 
     set_ac_timer(&schedule_data[cpu].s_timer, now + r_time);
 
-    /* Must be protected by the schedule_lock! */
+    if ( unlikely(prev == next) )
+    {
+        spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+        return continue_running(prev);
+    }
+
+    clear_bit(_VCPUF_running, &prev->vcpu_flags);
     set_bit(_VCPUF_running, &next->vcpu_flags);
-
-    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
-
-    if ( unlikely(prev == next) )
-        return continue_running(prev);
 
     perfc_incrc(sched_ctx);
 
@@ -517,6 +518,10 @@
              next->domain->domain_id, next->vcpu_id);
 
     context_switch(prev, next);
+
+    spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+
+    context_switch_finalise(next);
 }
 
 /* No locking needed -- pointer comparison is safe :-) */
diff -r 6a6c4a422780 -r 23979fb12c49 xen/include/asm-x86/e820.h
--- a/xen/include/asm-x86/e820.h        Tue Aug 16 22:27:16 2005
+++ b/xen/include/asm-x86/e820.h        Wed Aug 17 20:33:56 2005
@@ -3,7 +3,7 @@
 
 #include <asm/page.h>
 
-#define E820MAX        32
+#define E820MAX        128
 
 #define E820_RAM          1
 #define E820_RESERVED     2
diff -r 6a6c4a422780 -r 23979fb12c49 xen/include/asm-x86/vmx_vmcs.h
--- a/xen/include/asm-x86/vmx_vmcs.h    Tue Aug 16 22:27:16 2005
+++ b/xen/include/asm-x86/vmx_vmcs.h    Wed Aug 17 20:33:56 2005
@@ -28,10 +28,10 @@
 extern void stop_vmx(void);
 
 #if defined (__x86_64__)
-extern void vmx_load_msrs(struct vcpu *p, struct vcpu *n);
+extern void vmx_load_msrs(struct vcpu *n);
 void vmx_restore_msrs(struct vcpu *d);
 #else
-#define vmx_load_msrs(_p, _n)      ((void)0)
+#define vmx_load_msrs(_n)          ((void)0)
 #define vmx_restore_msrs(_v)       ((void)0)
 #endif
 
diff -r 6a6c4a422780 -r 23979fb12c49 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h   Tue Aug 16 22:27:16 2005
+++ b/xen/include/xen/sched.h   Wed Aug 17 20:33:56 2005
@@ -258,12 +258,32 @@
 extern void sync_lazy_execstate_all(void);
 extern int __sync_lazy_execstate(void);
 
-/* Called by the scheduler to switch to another vcpu. */
+/*
+ * Called by the scheduler to switch to another VCPU. On entry, although
+ * VCPUF_running is no longer asserted for @prev, its context is still running
+ * on the local CPU and is not committed to memory. The local scheduler lock
+ * is therefore still held, and interrupts are disabled, because the local CPU
+ * is in an inconsistent state.
+ * 
+ * The callee must ensure that the local CPU is no longer running in @prev's
+ * context, and that the context is saved to memory, before returning.
+ * Alternatively, if implementing lazy context switching, it suffices to ensure
+ * that invoking __sync_lazy_execstate() will switch and commit @prev's state.
+ */
 extern void context_switch(
     struct vcpu *prev, 
     struct vcpu *next);
 
-/* Called by the scheduler to continue running the current vcpu. */
+/*
+ * On some architectures (notably x86) it is not possible to entirely load
+ * @next's context with interrupts disabled. These may implement a function to
+ * finalise loading the new context after interrupts are re-enabled. This
+ * function is not given @prev and is not permitted to access it.
+ */
+extern void context_switch_finalise(
+    struct vcpu *next);
+
+/* Called by the scheduler to continue running the current VCPU. */
 extern void continue_running(
     struct vcpu *same);
 

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>