# HG changeset patch
# User adsharma@xxxxxxxxxxxxxxxxxxxx
# Node ID 23979fb12c4908a5743b833da8d87e73677c5461
# Parent 6a6c4a422780f0aeb357f2fd8286a36afd3876b8
# Parent fbdbe4fc218de40d5176e0104908e05fb6e2c6ce
Merge.
diff -r 6a6c4a422780 -r 23979fb12c49
linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Tue Aug 16
22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile Wed Aug 17
20:33:56 2005
@@ -44,7 +44,7 @@
c-obj-$(CONFIG_EFI) += efi.o efi_stub.o
c-obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
c-obj-$(CONFIG_SMP_ALTERNATIVES)+= smpalts.o
-c-obj-$(CONFIG_SWIOTLB) += swiotlb.o
+obj-$(CONFIG_SWIOTLB) += swiotlb.o
EXTRA_AFLAGS := -traditional
diff -r 6a6c4a422780 -r 23979fb12c49
linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c Tue Aug 16
22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c Wed Aug 17
20:33:56 2005
@@ -115,9 +115,6 @@
EXPORT_SYMBOL(__copy_to_user_ll);
EXPORT_SYMBOL(strnlen_user);
-EXPORT_SYMBOL(dma_alloc_coherent);
-EXPORT_SYMBOL(dma_free_coherent);
-
#ifdef CONFIG_PCI
EXPORT_SYMBOL(pci_mem_start);
#endif
diff -r 6a6c4a422780 -r 23979fb12c49
linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c Tue Aug 16
22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c Wed Aug 17
20:33:56 2005
@@ -24,13 +24,14 @@
unsigned long *bitmap;
};
-static void iommu_bug(void)
-{
- printk(KERN_ALERT "Fatal DMA error! Please use 'swiotlb=force'\n");
- BUG();
-}
-
-#define IOMMU_BUG_ON(test) do { if (unlikely(test)) iommu_bug(); } while(0)
+#define IOMMU_BUG_ON(test) \
+do { \
+ if (unlikely(test)) { \
+ printk(KERN_ALERT "Fatal DMA error! " \
+ "Please use 'swiotlb=force'\n"); \
+ BUG(); \
+ } \
+} while (0)
int
dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
diff -r 6a6c4a422780 -r 23979fb12c49
linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c
--- a/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Tue Aug 16
22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/hypervisor.c Wed Aug 17
20:33:56 2005
@@ -35,6 +35,7 @@
#include <asm/pgtable.h>
#include <asm-xen/hypervisor.h>
#include <asm-xen/balloon.h>
+#include <linux/module.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
#include <linux/percpu.h>
#include <asm/tlbflush.h>
@@ -352,7 +353,6 @@
balloon_unlock(flags);
}
-#ifdef CONFIG_XEN_PHYSDEV_ACCESS
unsigned long allocate_empty_lowmem_region(unsigned long pages)
{
@@ -401,4 +401,4 @@
return vstart;
}
-#endif /* CONFIG_XEN_PHYSDEV_ACCESS */
+EXPORT_SYMBOL(allocate_empty_lowmem_region);
diff -r 6a6c4a422780 -r 23979fb12c49
linux-2.6-xen-sparse/arch/xen/kernel/reboot.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Tue Aug 16 22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/reboot.c Wed Aug 17 20:33:56 2005
@@ -256,19 +256,23 @@
char *str;
str = (char *)xenbus_read("control", "shutdown", NULL);
- /* Ignore read errors and recursive shutdown events. */
- if (IS_ERR(str) || !strcmp(str, __stringify(SHUTDOWN_INVALID)))
+ /* Ignore read errors. */
+ if (IS_ERR(str))
return;
-
- xenbus_printf("control", "shutdown", "%i", SHUTDOWN_INVALID);
-
- if (strcmp(str, "poweroff") == 0) {
+ if (strlen(str) == 0) {
+ kfree(str);
+ return;
+ }
+
+ xenbus_write("control", "shutdown", "", O_CREAT);
+
+ if (strcmp(str, "poweroff") == 0)
shutting_down = SHUTDOWN_POWEROFF;
- } else if (strcmp(str, "reboot") == 0) {
+ else if (strcmp(str, "reboot") == 0)
shutting_down = SHUTDOWN_REBOOT;
- } else if (strcmp(str, "suspend") == 0) {
+ else if (strcmp(str, "suspend") == 0)
shutting_down = SHUTDOWN_SUSPEND;
- } else {
+ else {
printk("Ignoring shutdown request: %s\n", str);
shutting_down = SHUTDOWN_INVALID;
}
diff -r 6a6c4a422780 -r 23979fb12c49
linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c
--- a/linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c Tue Aug 16 22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/kernel/skbuff.c Wed Aug 17 20:33:56 2005
@@ -5,8 +5,6 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/etherdevice.h>
@@ -14,34 +12,86 @@
#include <linux/init.h>
#include <asm/io.h>
#include <asm/page.h>
-
-EXPORT_SYMBOL(__dev_alloc_skb);
+#include <asm-xen/hypervisor.h>
/* Referenced in netback.c. */
/*static*/ kmem_cache_t *skbuff_cachep;
-/* Size must be cacheline-aligned (alloc_skb uses SKB_DATA_ALIGN). */
-#define XEN_SKB_SIZE \
- ((PAGE_SIZE - sizeof(struct skb_shared_info)) & ~(SMP_CACHE_BYTES - 1))
+#define MAX_SKBUFF_ORDER 2
+static kmem_cache_t *skbuff_order_cachep[MAX_SKBUFF_ORDER + 1];
struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask)
{
- struct sk_buff *skb;
- skb = alloc_skb_from_cache(skbuff_cachep, length + 16, gfp_mask);
- if ( likely(skb != NULL) )
- skb_reserve(skb, 16);
- return skb;
+ struct sk_buff *skb;
+ int order;
+
+ length = SKB_DATA_ALIGN(length + 16);
+ order = get_order(length + sizeof(struct skb_shared_info));
+ if (order > MAX_SKBUFF_ORDER) {
+ printk(KERN_ALERT "Attempt to allocate order %d skbuff. "
+ "Increase MAX_SKBUFF_ORDER.\n", order);
+ return NULL;
+ }
+
+ skb = alloc_skb_from_cache(
+ skbuff_order_cachep[order], length, gfp_mask);
+ if (skb != NULL)
+ skb_reserve(skb, 16);
+
+ return skb;
}
static void skbuff_ctor(void *buf, kmem_cache_t *cachep, unsigned long unused)
{
- scrub_pages(buf, 1);
+ int order = 0;
+
+ while (skbuff_order_cachep[order] != cachep)
+ order++;
+
+ if (order != 0)
+ xen_create_contiguous_region((unsigned long)buf, order);
+
+ scrub_pages(buf, 1 << order);
+}
+
+static void skbuff_dtor(void *buf, kmem_cache_t *cachep, unsigned long unused)
+{
+ int order = 0;
+
+ while (skbuff_order_cachep[order] != cachep)
+ order++;
+
+ if (order != 0)
+ xen_destroy_contiguous_region((unsigned long)buf, order);
}
static int __init skbuff_init(void)
{
- skbuff_cachep = kmem_cache_create(
- "xen-skb", PAGE_SIZE, PAGE_SIZE, 0, skbuff_ctor, NULL);
- return 0;
+ static char name[MAX_SKBUFF_ORDER + 1][20];
+ unsigned long size;
+ int order;
+
+ for (order = 0; order <= MAX_SKBUFF_ORDER; order++) {
+ size = PAGE_SIZE << order;
+ sprintf(name[order], "xen-skb-%lu", size);
+ skbuff_order_cachep[order] = kmem_cache_create(
+ name[order], size, size, 0, skbuff_ctor, skbuff_dtor);
+ }
+
+ skbuff_cachep = skbuff_order_cachep[0];
+
+ return 0;
}
__initcall(skbuff_init);
+
+EXPORT_SYMBOL(__dev_alloc_skb);
+
+/*
+ * Local variables:
+ * c-file-style: "linux"
+ * indent-tabs-mode: t
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * tab-width: 8
+ * End:
+ */
diff -r 6a6c4a422780 -r 23979fb12c49
linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c
--- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c Tue Aug 16 22:27:16 2005
+++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c Wed Aug 17 20:33:56 2005
@@ -250,7 +250,11 @@
happen within a race in page table update. In the later
case just flush. */
- pgd = pgd_offset(current->mm ?: &init_mm, address);
+ /* On Xen the line below does not always work. Needs investigating! */
+ /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/
+ pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id());
+ pgd += pgd_index(address);
+
pgd_ref = pgd_offset_k(address);
if (pgd_none(*pgd_ref))
return -1;
diff -r 6a6c4a422780 -r 23979fb12c49
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h Tue Aug 16
22:27:16 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_comms.h Wed Aug 17
20:33:56 2005
@@ -1,6 +1,33 @@
-/* Private include for xenbus communications. */
+/*
+ * Private include for xenbus communications.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
#ifndef _XENBUS_COMMS_H
#define _XENBUS_COMMS_H
+
int xs_init(void);
int xb_init_comms(void);
void xb_suspend_comms(void);
diff -r 6a6c4a422780 -r 23979fb12c49
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Tue Aug 16
22:27:16 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_probe.c Wed Aug 17
20:33:56 2005
@@ -309,6 +309,7 @@
void xenbus_resume(void)
{
xb_init_comms();
+ reregister_xenbus_watches();
up(&xenbus_lock);
}
diff -r 6a6c4a422780 -r 23979fb12c49
linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c
--- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Tue Aug 16
22:27:16 2005
+++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c Wed Aug 17
20:33:56 2005
@@ -496,6 +496,18 @@
watch->node, err);
}
+/* Re-register callbacks to all watches. */
+void reregister_xenbus_watches(void)
+{
+ struct xenbus_watch *watch;
+ char token[sizeof(watch) * 2 + 1];
+
+ list_for_each_entry(watch, &watches, list) {
+ sprintf(token, "%lX", (long)watch);
+ xs_watch(watch->node, token);
+ }
+}
+
static int watch_thread(void *unused)
{
for (;;) {
diff -r 6a6c4a422780 -r 23979fb12c49
linux-2.6-xen-sparse/include/asm-xen/hypervisor.h
--- a/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Tue Aug 16 22:27:16 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/hypervisor.h Wed Aug 17 20:33:56 2005
@@ -137,10 +137,8 @@
void xen_create_contiguous_region(unsigned long vstart, unsigned int order);
void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order);
-#ifdef CONFIG_XEN_PHYSDEV_ACCESS
/* Allocate a contiguous empty region of low memory. Return virtual start. */
unsigned long allocate_empty_lowmem_region(unsigned long pages);
-#endif
#include <asm/hypercall.h>
diff -r 6a6c4a422780 -r 23979fb12c49
linux-2.6-xen-sparse/include/asm-xen/xenbus.h
--- a/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Tue Aug 16 22:27:16 2005
+++ b/linux-2.6-xen-sparse/include/asm-xen/xenbus.h Wed Aug 17 20:33:56 2005
@@ -1,5 +1,3 @@
-#ifndef _ASM_XEN_XENBUS_H
-#define _ASM_XEN_XENBUS_H
/******************************************************************************
* xenbus.h
*
@@ -28,6 +26,10 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
+
+#ifndef _ASM_XEN_XENBUS_H
+#define _ASM_XEN_XENBUS_H
+
#include <linux/device.h>
#include <linux/notifier.h>
#include <asm/semaphore.h>
@@ -119,6 +121,7 @@
int register_xenbus_watch(struct xenbus_watch *watch);
void unregister_xenbus_watch(struct xenbus_watch *watch);
+void reregister_xenbus_watches(void);
/* Called from xen core code. */
void xenbus_suspend(void);
diff -r 6a6c4a422780 -r 23979fb12c49 tools/examples/network-bridge
--- a/tools/examples/network-bridge Tue Aug 16 22:27:16 2005
+++ b/tools/examples/network-bridge Wed Aug 17 20:33:56 2005
@@ -189,7 +189,7 @@
fi
ip link set ${netdev} name p${netdev}
ip link set veth0 name ${netdev}
- ifconfig p${netdev} -arp down
+ ifconfig p${netdev} 0.0.0.0 -arp down
ifconfig p${netdev} hw ether fe:ff:ff:ff:ff:ff
ifconfig ${netdev} hw ether ${mac}
add_to_bridge ${bridge} vif0.0
diff -r 6a6c4a422780 -r 23979fb12c49 tools/misc/xend
--- a/tools/misc/xend Tue Aug 16 22:27:16 2005
+++ b/tools/misc/xend Wed Aug 17 20:33:56 2005
@@ -117,11 +117,15 @@
return
def start_xenstored():
- s,o = commands.getstatusoutput("/usr/sbin/xenstored
--pid-file=/var/run/xenstore.pid");
+ XENSTORED_TRACE = os.getenv("XENSTORED_TRACE")
+ cmd = "/usr/sbin/xenstored --pid-file=/var/run/xenstore.pid"
+ if XENSTORED_TRACE:
+ cmd += " -T /var/log/xenstored-trace.log"
+ s,o = commands.getstatusoutput(cmd)
def start_consoled():
if os.fork() == 0:
- os.execvp('/usr/sbin/xenconsoled', ['/usr/sbin/xenconsoled']);
+ os.execvp('/usr/sbin/xenconsoled', ['/usr/sbin/xenconsoled'])
def main():
try:
diff -r 6a6c4a422780 -r 23979fb12c49 tools/python/xen/xend/XendDomain.py
--- a/tools/python/xen/xend/XendDomain.py Tue Aug 16 22:27:16 2005
+++ b/tools/python/xen/xend/XendDomain.py Wed Aug 17 20:33:56 2005
@@ -320,8 +320,7 @@
@param vmconfig: vm configuration
"""
config = sxp.child_value(vmconfig, 'config')
- uuid = sxp.child_value(vmconfig, 'uuid')
- dominfo = XendDomainInfo.restore(self.dbmap, config, uuid=uuid)
+ dominfo = XendDomainInfo.restore(self.dbmap, config)
return dominfo
def domain_restore(self, src, progress=False):
diff -r 6a6c4a422780 -r 23979fb12c49 tools/python/xen/xend/XendDomainInfo.py
--- a/tools/python/xen/xend/XendDomainInfo.py Tue Aug 16 22:27:16 2005
+++ b/tools/python/xen/xend/XendDomainInfo.py Wed Aug 17 20:33:56 2005
@@ -195,19 +195,22 @@
recreate = classmethod(recreate)
- def restore(cls, parentdb, config, uuid):
+ def restore(cls, parentdb, config, uuid=None):
"""Create a domain and a VM object to do a restore.
@param parentdb: parent db
@param config: domain configuration
@param uuid: uuid to use
"""
+ if not uuid:
+ uuid = getUuid()
db = parentdb.addChild(uuid)
vm = cls(db)
ssidref = int(sxp.child_value(config, 'ssidref'))
log.debug('restoring with ssidref='+str(ssidref))
id = xc.domain_create(ssidref = ssidref)
vm.setdom(id)
+ vm.clear_shutdown()
try:
vm.restore = True
vm.construct(config)
@@ -979,6 +982,11 @@
if not reason in ['suspend']:
self.shutdown_pending = {'start':time.time(), 'reason':reason}
+ def clear_shutdown(self):
+ db = self.db.addChild("/control")
+ db['shutdown'] = ""
+ db.saveDB(save=True)
+
def send_sysrq(self, key=0):
db = self.db.addChild("/control");
db['sysrq'] = '%c' % key;
diff -r 6a6c4a422780 -r 23979fb12c49 tools/python/xen/xm/create.py
--- a/tools/python/xen/xm/create.py Tue Aug 16 22:27:16 2005
+++ b/tools/python/xen/xm/create.py Wed Aug 17 20:33:56 2005
@@ -380,7 +380,6 @@
@return: MAC address string
"""
- random.seed()
mac = [ 0xaa, 0x00, 0x00,
random.randint(0x00, 0x7f),
random.randint(0x00, 0xff),
@@ -689,6 +688,7 @@
del xc
def main(argv):
+ random.seed()
opts = gopts
args = opts.parse(argv)
if opts.vals.help:
diff -r 6a6c4a422780 -r 23979fb12c49 tools/xenstore/xenstored.h
--- a/tools/xenstore/xenstored.h Tue Aug 16 22:27:16 2005
+++ b/tools/xenstore/xenstored.h Wed Aug 17 20:33:56 2005
@@ -1,21 +1,29 @@
-/*
- Simple prototyle Xen Store Daemon providing simple tree-like database.
- Copyright (C) 2005 Rusty Russell IBM Corporation
+/*
+ * Simple prototyle Xen Store Daemon providing simple tree-like database.
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-*/
#ifndef _XENSTORED_H
#define _XENSTORED_H
diff -r 6a6c4a422780 -r 23979fb12c49 xen/arch/ia64/xenmisc.c
--- a/xen/arch/ia64/xenmisc.c Tue Aug 16 22:27:16 2005
+++ b/xen/arch/ia64/xenmisc.c Wed Aug 17 20:33:56 2005
@@ -280,7 +280,6 @@
unsigned long context_switch_count = 0;
-// context_switch
void context_switch(struct vcpu *prev, struct vcpu *next)
{
//printk("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n");
@@ -290,22 +289,14 @@
//if (prev->domain->domain_id == 0 && next->domain->domain_id == 1) cs01foo();
//printk("@@sw %d->%d\n",prev->domain->domain_id,next->domain->domain_id);
#ifdef CONFIG_VTI
- unsigned long psr;
- /* Interrupt is enabled after next task is chosen.
- * So we have to disable it for stack switch.
- */
- local_irq_save(psr);
vtm_domain_out(prev);
- /* Housekeeping for prev domain */
-#endif // CONFIG_VTI
-
+#endif
context_switch_count++;
switch_to(prev,next,prev);
#ifdef CONFIG_VTI
- /* Post-setup for new domain */
vtm_domain_in(current);
- local_irq_restore(psr);
-#endif // CONFIG_VTI
+#endif
+
// leave this debug for now: it acts as a heartbeat when more than
// one domain is active
{
@@ -315,25 +306,27 @@
if (!cnt[id]--) { printk("%x",id); cnt[id] = 500000; }
if (!i--) { printk("+",id); i = 1000000; }
}
- clear_bit(_VCPUF_running, &prev->vcpu_flags);
- //if (!is_idle_task(next->domain) )
- //send_guest_virq(next, VIRQ_TIMER);
+
#ifdef CONFIG_VTI
if (VMX_DOMAIN(current))
vmx_load_all_rr(current);
- return;
-#else // CONFIG_VTI
+#else
if (!is_idle_task(current->domain)) {
load_region_regs(current);
if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
}
if (vcpu_timer_expired(current)) vcpu_pend_timer(current);
-#endif // CONFIG_VTI
+#endif
+}
+
+void context_switch_finalise(struct vcpu *next)
+{
+ /* nothing to do */
}
void continue_running(struct vcpu *same)
{
- /* nothing to do */
+ /* nothing to do */
}
void panic_domain(struct pt_regs *regs, const char *fmt, ...)
diff -r 6a6c4a422780 -r 23979fb12c49 xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Tue Aug 16 22:27:16 2005
+++ b/xen/arch/x86/domain.c Wed Aug 17 20:33:56 2005
@@ -48,6 +48,8 @@
struct percpu_ctxt {
struct vcpu *curr_vcpu;
+ unsigned int context_not_finalised;
+ unsigned int dirty_segment_mask;
} __cacheline_aligned;
static struct percpu_ctxt percpu_ctxt[NR_CPUS];
@@ -541,51 +543,59 @@
__r; })
#if CONFIG_VMX
-#define load_msrs(_p, _n) if (vmx_switch_on) vmx_load_msrs((_p), (_n))
+#define load_msrs(n) if (vmx_switch_on) vmx_load_msrs(n)
#else
-#define load_msrs(_p, _n) ((void)0)
+#define load_msrs(n) ((void)0)
#endif
-static void load_segments(struct vcpu *p, struct vcpu *n)
-{
- struct vcpu_guest_context *pctxt = &p->arch.guest_context;
+/*
+ * save_segments() writes a mask of segments which are dirty (non-zero),
+ * allowing load_segments() to avoid some expensive segment loads and
+ * MSR writes.
+ */
+#define DIRTY_DS 0x01
+#define DIRTY_ES 0x02
+#define DIRTY_FS 0x04
+#define DIRTY_GS 0x08
+#define DIRTY_FS_BASE 0x10
+#define DIRTY_GS_BASE_USER 0x20
+
+static void load_segments(struct vcpu *n)
+{
struct vcpu_guest_context *nctxt = &n->arch.guest_context;
int all_segs_okay = 1;
+ unsigned int dirty_segment_mask, cpu = smp_processor_id();
+
+ /* Load and clear the dirty segment mask. */
+ dirty_segment_mask = percpu_ctxt[cpu].dirty_segment_mask;
+ percpu_ctxt[cpu].dirty_segment_mask = 0;
/* Either selector != 0 ==> reload. */
- if ( unlikely(pctxt->user_regs.ds | nctxt->user_regs.ds) )
+ if ( unlikely((dirty_segment_mask & DIRTY_DS) | nctxt->user_regs.ds) )
all_segs_okay &= loadsegment(ds, nctxt->user_regs.ds);
/* Either selector != 0 ==> reload. */
- if ( unlikely(pctxt->user_regs.es | nctxt->user_regs.es) )
+ if ( unlikely((dirty_segment_mask & DIRTY_ES) | nctxt->user_regs.es) )
all_segs_okay &= loadsegment(es, nctxt->user_regs.es);
/*
* Either selector != 0 ==> reload.
* Also reload to reset FS_BASE if it was non-zero.
*/
- if ( unlikely(pctxt->user_regs.fs |
- pctxt->fs_base |
+ if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) |
nctxt->user_regs.fs) )
- {
all_segs_okay &= loadsegment(fs, nctxt->user_regs.fs);
- if ( pctxt->user_regs.fs ) /* != 0 selector kills fs_base */
- pctxt->fs_base = 0;
- }
/*
* Either selector != 0 ==> reload.
* Also reload to reset GS_BASE if it was non-zero.
*/
- if ( unlikely(pctxt->user_regs.gs |
- pctxt->gs_base_user |
+ if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) |
nctxt->user_regs.gs) )
{
/* Reset GS_BASE with user %gs? */
- if ( pctxt->user_regs.gs || !nctxt->gs_base_user )
+ if ( (dirty_segment_mask & DIRTY_GS) || !nctxt->gs_base_user )
all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs);
- if ( pctxt->user_regs.gs ) /* != 0 selector kills gs_base_user */
- pctxt->gs_base_user = 0;
}
/* This can only be non-zero if selector is NULL. */
@@ -650,7 +660,9 @@
static void save_segments(struct vcpu *v)
{
- struct cpu_user_regs *regs = &v->arch.guest_context.user_regs;
+ struct vcpu_guest_context *ctxt = &v->arch.guest_context;
+ struct cpu_user_regs *regs = &ctxt->user_regs;
+ unsigned int dirty_segment_mask = 0;
if ( VMX_DOMAIN(v) )
rdmsrl(MSR_SHADOW_GS_BASE, v->arch.arch_vmx.msr_content.shadow_gs);
@@ -659,18 +671,34 @@
__asm__ __volatile__ ( "movl %%es,%0" : "=m" (regs->es) );
__asm__ __volatile__ ( "movl %%fs,%0" : "=m" (regs->fs) );
__asm__ __volatile__ ( "movl %%gs,%0" : "=m" (regs->gs) );
-}
-
-static void clear_segments(void)
-{
- __asm__ __volatile__ (
- " movl %0,%%ds; "
- " movl %0,%%es; "
- " movl %0,%%fs; "
- " movl %0,%%gs; "
- ""safe_swapgs" "
- " movl %0,%%gs"
- : : "r" (0) );
+
+ if ( regs->ds )
+ dirty_segment_mask |= DIRTY_DS;
+
+ if ( regs->es )
+ dirty_segment_mask |= DIRTY_ES;
+
+ if ( regs->fs )
+ {
+ dirty_segment_mask |= DIRTY_FS;
+ ctxt->fs_base = 0; /* != 0 selector kills fs_base */
+ }
+ else if ( ctxt->fs_base )
+ {
+ dirty_segment_mask |= DIRTY_FS_BASE;
+ }
+
+ if ( regs->gs )
+ {
+ dirty_segment_mask |= DIRTY_GS;
+ ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */
+ }
+ else if ( ctxt->gs_base_user )
+ {
+ dirty_segment_mask |= DIRTY_GS_BASE_USER;
+ }
+
+ percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask;
}
long do_switch_to_user(void)
@@ -706,10 +734,9 @@
#elif defined(__i386__)
-#define load_segments(_p, _n) ((void)0)
-#define load_msrs(_p, _n) ((void)0)
-#define save_segments(_p) ((void)0)
-#define clear_segments() ((void)0)
+#define load_segments(n) ((void)0)
+#define load_msrs(n) ((void)0)
+#define save_segments(p) ((void)0)
static inline void switch_kernel_stack(struct vcpu *n, unsigned int cpu)
{
@@ -726,9 +753,9 @@
static void __context_switch(void)
{
struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
- unsigned int cpu = smp_processor_id();
- struct vcpu *p = percpu_ctxt[cpu].curr_vcpu;
- struct vcpu *n = current;
+ unsigned int cpu = smp_processor_id();
+ struct vcpu *p = percpu_ctxt[cpu].curr_vcpu;
+ struct vcpu *n = current;
if ( !is_idle_task(p->domain) )
{
@@ -786,23 +813,31 @@
void context_switch(struct vcpu *prev, struct vcpu *next)
{
- struct vcpu *realprev;
-
- local_irq_disable();
+ unsigned int cpu = smp_processor_id();
+
+ ASSERT(!local_irq_is_enabled());
set_current(next);
- if ( ((realprev = percpu_ctxt[smp_processor_id()].curr_vcpu) == next) ||
- is_idle_task(next->domain) )
- {
- local_irq_enable();
- }
- else
+ if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) )
{
__context_switch();
-
- local_irq_enable();
-
+ percpu_ctxt[cpu].context_not_finalised = 1;
+ }
+}
+
+void context_switch_finalise(struct vcpu *next)
+{
+ unsigned int cpu = smp_processor_id();
+
+ ASSERT(local_irq_is_enabled());
+
+ if ( percpu_ctxt[cpu].context_not_finalised )
+ {
+ percpu_ctxt[cpu].context_not_finalised = 0;
+
+ BUG_ON(percpu_ctxt[cpu].curr_vcpu != next);
+
if ( VMX_DOMAIN(next) )
{
vmx_restore_msrs(next);
@@ -810,18 +845,10 @@
else
{
load_LDT(next);
- load_segments(realprev, next);
- load_msrs(realprev, next);
- }
- }
-
- /*
- * We do this late on because it doesn't need to be protected by the
- * schedule_lock, and because we want this to be the very last use of
- * 'prev' (after this point, a dying domain's info structure may be freed
- * without warning).
- */
- clear_bit(_VCPUF_running, &prev->vcpu_flags);
+ load_segments(next);
+ load_msrs(next);
+ }
+ }
schedule_tail(next);
BUG();
@@ -835,12 +862,19 @@
int __sync_lazy_execstate(void)
{
- if ( percpu_ctxt[smp_processor_id()].curr_vcpu == current )
- return 0;
- __context_switch();
- load_LDT(current);
- clear_segments();
- return 1;
+ unsigned long flags;
+ int switch_required;
+
+ local_irq_save(flags);
+
+ switch_required = (percpu_ctxt[smp_processor_id()].curr_vcpu != current);
+
+ if ( switch_required )
+ __context_switch();
+
+ local_irq_restore(flags);
+
+ return switch_required;
}
void sync_lazy_execstate_cpu(unsigned int cpu)
diff -r 6a6c4a422780 -r 23979fb12c49 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c Tue Aug 16 22:27:16 2005
+++ b/xen/arch/x86/vmx.c Wed Aug 17 20:33:56 2005
@@ -65,7 +65,7 @@
* are not modified once set for generic domains, we don't save them,
* but simply reset them to the values set at percpu_traps_init().
*/
-void vmx_load_msrs(struct vcpu *p, struct vcpu *n)
+void vmx_load_msrs(struct vcpu *n)
{
struct msr_state *host_state;
host_state = &percpu_msr[smp_processor_id()];
diff -r 6a6c4a422780 -r 23979fb12c49 xen/common/schedule.c
--- a/xen/common/schedule.c Tue Aug 16 22:27:16 2005
+++ b/xen/common/schedule.c Wed Aug 17 20:33:56 2005
@@ -474,13 +474,14 @@
set_ac_timer(&schedule_data[cpu].s_timer, now + r_time);
- /* Must be protected by the schedule_lock! */
+ if ( unlikely(prev == next) )
+ {
+ spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+ return continue_running(prev);
+ }
+
+ clear_bit(_VCPUF_running, &prev->vcpu_flags);
set_bit(_VCPUF_running, &next->vcpu_flags);
-
- spin_unlock_irq(&schedule_data[cpu].schedule_lock);
-
- if ( unlikely(prev == next) )
- return continue_running(prev);
perfc_incrc(sched_ctx);
@@ -517,6 +518,10 @@
next->domain->domain_id, next->vcpu_id);
context_switch(prev, next);
+
+ spin_unlock_irq(&schedule_data[cpu].schedule_lock);
+
+ context_switch_finalise(next);
}
/* No locking needed -- pointer comparison is safe :-) */
diff -r 6a6c4a422780 -r 23979fb12c49 xen/include/asm-x86/e820.h
--- a/xen/include/asm-x86/e820.h Tue Aug 16 22:27:16 2005
+++ b/xen/include/asm-x86/e820.h Wed Aug 17 20:33:56 2005
@@ -3,7 +3,7 @@
#include <asm/page.h>
-#define E820MAX 32
+#define E820MAX 128
#define E820_RAM 1
#define E820_RESERVED 2
diff -r 6a6c4a422780 -r 23979fb12c49 xen/include/asm-x86/vmx_vmcs.h
--- a/xen/include/asm-x86/vmx_vmcs.h Tue Aug 16 22:27:16 2005
+++ b/xen/include/asm-x86/vmx_vmcs.h Wed Aug 17 20:33:56 2005
@@ -28,10 +28,10 @@
extern void stop_vmx(void);
#if defined (__x86_64__)
-extern void vmx_load_msrs(struct vcpu *p, struct vcpu *n);
+extern void vmx_load_msrs(struct vcpu *n);
void vmx_restore_msrs(struct vcpu *d);
#else
-#define vmx_load_msrs(_p, _n) ((void)0)
+#define vmx_load_msrs(_n) ((void)0)
#define vmx_restore_msrs(_v) ((void)0)
#endif
diff -r 6a6c4a422780 -r 23979fb12c49 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h Tue Aug 16 22:27:16 2005
+++ b/xen/include/xen/sched.h Wed Aug 17 20:33:56 2005
@@ -258,12 +258,32 @@
extern void sync_lazy_execstate_all(void);
extern int __sync_lazy_execstate(void);
-/* Called by the scheduler to switch to another vcpu. */
+/*
+ * Called by the scheduler to switch to another VCPU. On entry, although
+ * VCPUF_running is no longer asserted for @prev, its context is still running
+ * on the local CPU and is not committed to memory. The local scheduler lock
+ * is therefore still held, and interrupts are disabled, because the local CPU
+ * is in an inconsistent state.
+ *
+ * The callee must ensure that the local CPU is no longer running in @prev's
+ * context, and that the context is saved to memory, before returning.
+ * Alternatively, if implementing lazy context switching, it suffices to ensure
+ * that invoking __sync_lazy_execstate() will switch and commit @prev's state.
+ */
extern void context_switch(
struct vcpu *prev,
struct vcpu *next);
-/* Called by the scheduler to continue running the current vcpu. */
+/*
+ * On some architectures (notably x86) it is not possible to entirely load
+ * @next's context with interrupts disabled. These may implement a function to
+ * finalise loading the new context after interrupts are re-enabled. This
+ * function is not given @prev and is not permitted to access it.
+ */
+extern void context_switch_finalise(
+ struct vcpu *next);
+
+/* Called by the scheduler to continue running the current VCPU. */
extern void continue_running(
struct vcpu *same);
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|