--
Juergen Gross Principal Developer Operating Systems
TSP ES&S SWE OS6 Telephone: +49 (0) 89 3222 2967
Fujitsu Technology Solutions e-mail: juergen.gross@xxxxxxxxxxxxxx
Domagkstr. 28 Internet: ts.fujitsu.com
D-80807 Muenchen Company details: ts.fujitsu.com/imprint.html
Signed-off-by: juergen.gross@xxxxxxxxxxxxxx
diff -r fadf63ab49e7 xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Mon Apr 19 17:57:28 2010 +0100
+++ b/xen/arch/x86/domain_build.c Tue Apr 20 11:10:40 2010 +0200
@@ -9,6 +9,7 @@
#include <xen/lib.h>
#include <xen/ctype.h>
#include <xen/sched.h>
+#include <xen/sched-if.h>
#include <xen/smp.h>
#include <xen/delay.h>
#include <xen/event.h>
@@ -84,7 +85,7 @@ struct vcpu *__init alloc_dom0_vcpu0(voi
struct vcpu *__init alloc_dom0_vcpu0(void)
{
if ( opt_dom0_max_vcpus == 0 )
- opt_dom0_max_vcpus = num_online_cpus();
+ opt_dom0_max_vcpus = num_cpupool_cpus(cpupool0);
if ( opt_dom0_max_vcpus > MAX_VIRT_CPUS )
opt_dom0_max_vcpus = MAX_VIRT_CPUS;
@@ -277,7 +278,7 @@ int __init construct_dom0(
unsigned long _initrd_start, unsigned long initrd_len,
char *cmdline)
{
- int i, rc, compatible, compat32, order, machine;
+ int i, cpu, rc, compatible, compat32, order, machine;
struct cpu_user_regs *regs;
unsigned long pfn, mfn;
unsigned long nr_pages;
@@ -776,8 +777,12 @@ int __init construct_dom0(
printk("Dom0 has maximum %u VCPUs\n", opt_dom0_max_vcpus);
+ cpu = first_cpu(cpupool0->cpu_valid);
for ( i = 1; i < opt_dom0_max_vcpus; i++ )
- (void)alloc_vcpu(d, i, i % num_online_cpus());
+ {
+ cpu = cycle_cpu(cpu, cpupool0->cpu_valid);
+ (void)alloc_vcpu(d, i, cpu);
+ }
/* Set up CR3 value for write_ptbase */
if ( paging_mode_enabled(d) )
diff -r fadf63ab49e7 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Mon Apr 19 17:57:28 2010 +0100
+++ b/xen/arch/x86/mm.c Tue Apr 20 11:10:40 2010 +0200
@@ -242,7 +242,7 @@ void __init arch_init_memory(void)
* Any Xen-heap pages that we will allow to be mapped will have
* their domain field set to dom_xen.
*/
- dom_xen = domain_create(DOMID_XEN, DOMCRF_dummy, 0);
+ dom_xen = domain_create(DOMID_XEN, CPUPOOLID_NONE, DOMCRF_dummy, 0);
BUG_ON(dom_xen == NULL);
/*
@@ -250,14 +250,14 @@ void __init arch_init_memory(void)
* This domain owns I/O pages that are within the range of the page_info
* array. Mappings occur at the priv of the caller.
*/
- dom_io = domain_create(DOMID_IO, DOMCRF_dummy, 0);
+ dom_io = domain_create(DOMID_IO, CPUPOOLID_NONE, DOMCRF_dummy, 0);
BUG_ON(dom_io == NULL);
/*
* Initialise our DOMID_IO domain.
* This domain owns sharable pages.
*/
- dom_cow = domain_create(DOMID_COW, DOMCRF_dummy, 0);
+ dom_cow = domain_create(DOMID_COW, CPUPOOLID_NONE, DOMCRF_dummy, 0);
BUG_ON(dom_cow == NULL);
/* First 1MB of RAM is historically marked as I/O. */
diff -r fadf63ab49e7 xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Mon Apr 19 17:57:28 2010 +0100
+++ b/xen/arch/x86/platform_hypercall.c Tue Apr 20 11:10:40 2010 +0200
@@ -19,6 +19,7 @@
#include <xen/iocap.h>
#include <xen/guest_access.h>
#include <xen/acpi.h>
+#include <xen/sched-if.h>
#include <asm/current.h>
#include <public/platform.h>
#include <acpi/cpufreq/processor_perf.h>
@@ -407,7 +408,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
g_info = &op->u.pcpu_info;
/* spin_trylock() avoids deadlock with stop_machine_run(). */
- if ( !spin_trylock(&cpu_add_remove_lock) )
+ if ( !spin_trylock(&cpupool_lock) )
{
ret = -EBUSY;
break;
@@ -430,7 +431,7 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
g_info->max_present = last_cpu(cpu_present_map);
- spin_unlock(&cpu_add_remove_lock);
+ spin_unlock(&cpupool_lock);
ret = copy_to_guest(u_xenpf_op, op, 1) ? -EFAULT : 0;
}
diff -r fadf63ab49e7 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Mon Apr 19 17:57:28 2010 +0100
+++ b/xen/arch/x86/setup.c Tue Apr 20 11:10:40 2010 +0200
@@ -2,6 +2,7 @@
#include <xen/init.h>
#include <xen/lib.h>
#include <xen/sched.h>
+#include <xen/sched-if.h>
#include <xen/domain.h>
#include <xen/serial.h>
#include <xen/softirq.h>
@@ -245,7 +246,7 @@ static void __init init_idle_domain(void
/* Domain creation requires that scheduler structures are initialised. */
scheduler_init();
- idle_domain = domain_create(IDLE_DOMAIN_ID, 0, 0);
+ idle_domain = domain_create(IDLE_DOMAIN_ID, CPUPOOLID_NONE, 0, 0);
if ( idle_domain == NULL )
BUG();
idle_domain->vcpu = idle_vcpu;
@@ -1093,8 +1094,13 @@ void __init __start_xen(unsigned long mb
if ( !tboot_protect_mem_regions() )
panic("Could not protect TXT memory regions\n");
+ /* Create initial cpupool 0. */
+ cpupool0 = cpupool_create(0, NULL);
+ if ( (cpupool0 == NULL) || cpupool0_cpu_assign(cpupool0) )
+ panic("Error creating cpupool 0\n");
+
/* Create initial domain 0. */
- dom0 = domain_create(0, DOMCRF_s3_integrity, DOM0_SSIDREF);
+ dom0 = domain_create(0, 0, DOMCRF_s3_integrity, DOM0_SSIDREF);
if ( (dom0 == NULL) || (alloc_dom0_vcpu0() == NULL) )
panic("Error creating domain 0\n");
diff -r fadf63ab49e7 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c Mon Apr 19 17:57:28 2010 +0100
+++ b/xen/arch/x86/smpboot.c Tue Apr 20 11:10:40 2010 +0200
@@ -39,6 +39,7 @@
#include <xen/mm.h>
#include <xen/domain.h>
#include <xen/sched.h>
+#include <xen/sched-if.h>
#include <xen/irq.h>
#include <xen/delay.h>
#include <xen/softirq.h>
@@ -105,7 +106,6 @@ DEFINE_PER_CPU(int, cpu_state) = { 0 };
DEFINE_PER_CPU(int, cpu_state) = { 0 };
void *stack_base[NR_CPUS];
-DEFINE_SPINLOCK(cpu_add_remove_lock);
/*
* The bootstrap kernel entry code has set these up. Save them for
@@ -822,7 +822,7 @@ wakeup_secondary_cpu(int phys_apicid, un
extern cpumask_t cpu_initialized;
/*
- * Caller should hold cpu_add_remove_lock if not called when booting
+ * Caller should hold cpupool_lock if not called when booting
*/
int alloc_cpu_id(void)
{
@@ -986,8 +986,10 @@ static int __devinit do_boot_cpu(int api
cpucount--;
/* Mark the CPU as non-present */
+ spin_lock(&cpupool_lock);
x86_cpu_to_apicid[cpu] = BAD_APICID;
cpu_clear(cpu, cpu_present_map);
+ spin_unlock(&cpupool_lock);
} else {
}
@@ -1307,10 +1309,11 @@ int __cpu_disable(void)
sync_local_execstate();
/* It's now safe to remove this processor from the online map */
+ cpu_clear(cpu, cpupool0->cpu_valid);
cpu_clear(cpu, cpu_online_map);
fixup_irqs();
- cpu_disable_scheduler();
+ cpu_disable_scheduler(cpu, 0);
return 0;
}
@@ -1344,10 +1347,9 @@ int cpu_down(unsigned int cpu)
int err = 0;
/* spin_trylock() avoids deadlock with stop_machine_run(). */
- if (!spin_trylock(&cpu_add_remove_lock))
+ if (!spin_trylock(&cpupool_lock))
return -EBUSY;
-
- if (num_online_cpus() == 1) {
+ if ((!cpu_isset(cpu, cpupool0->cpu_valid)) ||
(cpus_weight(cpupool0->cpu_valid) == 1)) {
err = -EBUSY;
goto out;
}
@@ -1381,7 +1383,7 @@ out:
out:
if (!err)
send_guest_global_virq(dom0, VIRQ_PCPU_STATE);
- spin_unlock(&cpu_add_remove_lock);
+ spin_unlock(&cpupool_lock);
return err;
}
@@ -1390,7 +1392,7 @@ int cpu_up(unsigned int cpu)
int err = 0;
/* spin_trylock() avoids deadlock with stop_machine_run(). */
- if (!spin_trylock(&cpu_add_remove_lock))
+ if (!spin_trylock(&cpupool_lock))
return -EBUSY;
if (cpu_online(cpu)) {
@@ -1408,7 +1410,7 @@ out:
out:
if (!err)
send_guest_global_virq(dom0, VIRQ_PCPU_STATE);
- spin_unlock(&cpu_add_remove_lock);
+ spin_unlock(&cpupool_lock);
return err;
}
@@ -1494,14 +1496,14 @@ int cpu_add(uint32_t apic_id, uint32_t a
return -EEXIST;
/* spin_trylock() avoids deadlock with stop_machine_run(). */
- if (!spin_trylock(&cpu_add_remove_lock))
+ if (!spin_trylock(&cpupool_lock))
return -EBUSY;
cpu = mp_register_lapic(apic_id, 1);
if (cpu < 0)
{
- spin_unlock(&cpu_add_remove_lock);
+ spin_unlock(&cpupool_lock);
return cpu;
}
@@ -1518,7 +1520,7 @@ int cpu_add(uint32_t apic_id, uint32_t a
"Setup node failed for pxm %x\n", pxm);
x86_acpiid_to_apicid[acpi_id] = 0xff;
mp_unregister_lapic(apic_id, cpu);
- spin_unlock(&cpu_add_remove_lock);
+ spin_unlock(&cpupool_lock);
return node;
}
apicid_to_node[apic_id] = node;
@@ -1526,7 +1528,7 @@ int cpu_add(uint32_t apic_id, uint32_t a
srat_detect_node(cpu);
numa_add_cpu(cpu);
- spin_unlock(&cpu_add_remove_lock);
+ spin_unlock(&cpupool_lock);
dprintk(XENLOG_INFO, "Add CPU %x with index %x\n", apic_id, cpu);
return cpu;
}
@@ -1570,6 +1572,7 @@ int __devinit __cpu_up(unsigned int cpu)
process_pending_softirqs();
}
+ cpupool_cpu_add(cpu);
cpufreq_add_cpu(cpu);
return 0;
}
diff -r fadf63ab49e7 xen/common/Makefile
--- a/xen/common/Makefile Mon Apr 19 17:57:28 2010 +0100
+++ b/xen/common/Makefile Tue Apr 20 11:10:40 2010 +0200
@@ -1,5 +1,6 @@ obj-y += bitmap.o
obj-y += bitmap.o
obj-y += cpu.o
+obj-y += cpupool.o
obj-y += domctl.o
obj-y += domain.o
obj-y += event_channel.o
diff -r fadf63ab49e7 xen/common/cpupool.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/common/cpupool.c Tue Apr 20 11:10:40 2010 +0200
@@ -0,0 +1,580 @@
+/******************************************************************************
+ * cpupool.c
+ *
+ * Generic cpupool-handling functions.
+ *
+ * Cpupools are a feature to have configurable scheduling domains. Each
+ * cpupool runs an own scheduler on a dedicated set of physical cpus.
+ * A domain is bound to one cpupool at any time, but it can be moved to
+ * another cpupool.
+ *
+ * (C) 2009, Juergen Gross, Fujitsu Technology Solutions
+ */
+
+#include <xen/lib.h>
+#include <xen/init.h>
+#include <xen/cpumask.h>
+#include <xen/percpu.h>
+#include <xen/sched.h>
+#include <xen/sched-if.h>
+
+#define for_each_cpupool(ptr) \
+ for ((ptr) = &cpupool_list; *(ptr) != NULL; (ptr) = &((*(ptr))->next))
+
+struct cpupool *cpupool0; /* Initial cpupool with Dom0 */
+cpumask_t cpupool_free_cpus; /* cpus not in any cpupool */
+
+static struct cpupool *cpupool_list; /* linked list, sorted by poolid */
+
+static int cpupool0_max_cpus;
+integer_param("pool0_max_cpus", cpupool0_max_cpus);
+
+static int cpupool_moving_cpu = -1;
+static struct cpupool *cpupool_cpu_moving = NULL;
+
+/* cpupool lock: be carefull, this lock is sometimes released on another cpu
+ * as it was obtained!
+ */
+DEFINE_SPINLOCK(cpupool_lock);
+
+DEFINE_PER_CPU(struct cpupool *, cpupool);
+
+static struct cpupool *alloc_cpupool_struct(void)
+{
+ return xmalloc(struct cpupool);
+}
+
+static void free_cpupool_struct(struct cpupool *c)
+{
+ xfree(c);
+}
+
+/*
+ * find a cpupool by it's id. to be called with cpupool lock held
+ * if exact is not specified, the first cpupool with an id larger or equal to
+ * the searched id is returned
+ * returns NULL if not found.
+ */
+static struct cpupool *cpupool_find_by_id(int id, int exact)
+{
+ struct cpupool **q;
+
+ for_each_cpupool(q)
+ {
+ if ( (*q)->cpupool_id == id )
+ return *q;
+ if ( (*q)->cpupool_id > id )
+ break;
+ }
+ return exact ? NULL : *q;
+}
+
+/*
+ * create a new cpupool with specified poolid and scheduler
+ * returns pointer to new cpupool structure if okay, NULL else
+ * possible failures:
+ * - no memory
+ * - poolid already used
+ * - unknown scheduler
+ */
+struct cpupool *cpupool_create(int poolid, char *sched)
+{
+ struct cpupool *c;
+ struct cpupool **q;
+ int last = 0;
+
+ if ( (c = alloc_cpupool_struct()) == NULL )
+ return NULL;
+ memset(c, 0, sizeof(*c));
+
+ printk(XENLOG_DEBUG "cpupool_create(pool=%d,sched=%s)\n", poolid, sched);
+ spin_lock(&cpupool_lock);
+ for_each_cpupool(q)
+ {
+ last = (*q)->cpupool_id;
+ if ( (poolid != CPUPOOLID_NONE) && (last >= poolid) )
+ break;
+ }
+ if ( *q != NULL )
+ {
+ if ( (*q)->cpupool_id == poolid )
+ {
+ spin_unlock(&cpupool_lock);
+ free_cpupool_struct(c);
+ return NULL;
+ }
+ c->next = *q;
+ }
+ *q = c;
+ c->cpupool_id = (poolid == CPUPOOLID_NONE) ? (last + 1) : poolid;
+ if ( schedule_init_global(sched, &(c->sched)) )
+ {
+ spin_unlock(&cpupool_lock);
+ cpupool_destroy(c);
+ return NULL;
+ }
+ spin_unlock(&cpupool_lock);
+
+ printk("Created cpupool %d with scheduler %s (%s)\n", c->cpupool_id,
+ c->sched.name, c->sched.opt_name);
+
+ return c;
+}
+/*
+ * destroys the given cpupool
+ * returns 0 on success, 1 else
+ * possible failures:
+ * - pool still in use
+ * - cpus still assigned to pool
+ * - pool not in list
+ */
+int cpupool_destroy(struct cpupool *c)
+{
+ struct cpupool **q;
+
+ spin_lock(&cpupool_lock);
+ for_each_cpupool(q)
+ if ( *q == c )
+ break;
+ if ( (*q != c) || (c->n_dom != 0) || cpus_weight(c->cpu_valid) )
+ {
+ spin_unlock(&cpupool_lock);
+ return 1;
+ }
+ *q = c->next;
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_destroy(pool=%d)\n", c->cpupool_id);
+ schedule_deinit_global(&(c->sched));
+ free_cpupool_struct(c);
+ return 0;
+}
+
+/*
+ * assign a specific cpu to a cpupool
+ * cpupool_lock must be held
+ */
+static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
+{
+ if ( (cpupool_moving_cpu == cpu) && (c != cpupool_cpu_moving) )
+ return -EBUSY;
+ per_cpu(cpupool, cpu) = c;
+ schedule_cpu_switch(cpu, c);
+ cpu_clear(cpu, cpupool_free_cpus);
+ if (cpupool_moving_cpu == cpu)
+ {
+ cpupool_moving_cpu = -1;
+ cpupool_cpu_moving = NULL;
+ }
+ cpu_set(cpu, c->cpu_valid);
+ return 0;
+}
+
+/*
+ * assign free physical cpus to a cpupool
+ * cpus assigned are unused cpus with lowest possible ids
+ * returns the number of cpus assigned
+ */
+int cpupool_assign_ncpu(struct cpupool *c, int ncpu)
+{
+ int i;
+ int n;
+
+ n = 0;
+ spin_lock(&cpupool_lock);
+ for_each_cpu_mask(i, cpupool_free_cpus)
+ {
+ if ( cpupool_assign_cpu_locked(c, i) == 0 )
+ n++;
+ if ( n == ncpu )
+ break;
+ }
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_assign_ncpu(pool=%d,ncpu=%d) rc %d\n",
+ c->cpupool_id, ncpu, n);
+ return n;
+}
+
+static long cpupool_unassign_cpu_helper(void *info)
+{
+ struct cpupool *c = (struct cpupool *)info;
+ int cpu = cpupool_moving_cpu;
+ long ret;
+ int cpupool_id = c->cpupool_id;
+
+ ret = cpu_disable_scheduler(cpu, 1);
+ cpu_set(cpu, cpupool_free_cpus);
+ if ( !ret )
+ {
+ schedule_cpu_switch(cpu, NULL);
+ per_cpu(cpupool, cpu) = NULL;
+ cpupool_moving_cpu = -1;
+ cpupool_cpu_moving = NULL;
+ }
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_unassign_cpu(pool=%d,cpu=%d) ret %ld\n",
+ cpupool_id, cpu, ret);
+ return ret;
+}
+
+/*
+ * unassign a specific cpu from a cpupool
+ * we must be sure not to run on the cpu to be unassigned! to achieve this
+ * the main functionality is performed via continue_hypercall_on_cpu on a
+ * specific cpu.
+ * if the cpu to be removed is the last one of the cpupool no active domain
+ * must be bound to the cpupool. dying domains are moved to cpupool0 as they
+ * might be zombies.
+ * possible failures:
+ * - last cpu and still active domains in cpupool
+ */
+int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu)
+{
+ int work_cpu;
+ int ret;
+ struct domain *d;
+ int cpupool_id = c->cpupool_id;
+
+ printk(XENLOG_DEBUG "cpupool_unassign_cpu(pool=%d,cpu=%d)\n",
+ cpupool_id, cpu);
+ spin_lock(&cpupool_lock);
+ ret = -EBUSY;
+ if ( (cpupool_moving_cpu != -1) && (cpu != cpupool_moving_cpu) )
+ goto out;
+
+ ret = 0;
+ if ( !cpu_isset(cpu, c->cpu_valid) && (cpu != cpupool_moving_cpu) )
+ goto out;
+
+ if ( (c->n_dom > 0) && (cpus_weight(c->cpu_valid) == 1) &&
+ (cpu != cpupool_moving_cpu) )
+ {
+ for_each_domain(d)
+ {
+ if ( d->cpupool != c )
+ continue;
+ if ( !d->is_dying )
+ {
+ ret = -EBUSY;
+ break;
+ }
+ c->n_dom--;
+ ret = sched_move_domain(d, cpupool0);
+ if ( ret )
+ {
+ c->n_dom++;
+ break;
+ }
+ cpupool0->n_dom++;
+ }
+ if ( ret )
+ goto out;
+ }
+ cpupool_moving_cpu = cpu;
+ cpupool_cpu_moving = c;
+ cpu_clear(cpu, c->cpu_valid);
+ work_cpu = smp_processor_id();
+ if ( work_cpu == cpu )
+ {
+ work_cpu = first_cpu(cpupool0->cpu_valid);
+ if ( work_cpu == cpu )
+ work_cpu = next_cpu(cpu, cpupool0->cpu_valid);
+ }
+ return continue_hypercall_on_cpu(work_cpu, cpupool_unassign_cpu_helper, c);
+
+out:
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_unassign_cpu(pool=%d,cpu=%d) ret %d\n",
+ cpupool_id, cpu, ret);
+ return ret;
+}
+
+/*
+ * assign cpus to the default cpupool
+ * default are all cpus, less cpus may be specified as boot parameter
+ * possible failures:
+ * - no cpu assigned
+ */
+int __init cpupool0_cpu_assign(struct cpupool *c)
+{
+ if ( (cpupool0_max_cpus == 0) || (cpupool0_max_cpus > num_online_cpus()) )
+ cpupool0_max_cpus = num_online_cpus();
+ if ( !cpupool_assign_ncpu(cpupool0, cpupool0_max_cpus) )
+ return 1;
+ return 0;
+}
+
+/*
+ * add a new domain to a cpupool
+ * possible failures:
+ * - pool does not exist
+ * - no cpu assigned to pool
+ */
+int cpupool_add_domain(struct domain *d, int poolid)
+{
+ struct cpupool *c;
+ int rc = 1;
+ int n_dom;
+
+ if ( poolid == CPUPOOLID_NONE )
+ return 0;
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(poolid, 1);
+ if ( (c != NULL) && cpus_weight(c->cpu_valid) )
+ {
+ c->n_dom++;
+ n_dom = c->n_dom;
+ d->cpupool = c;
+ rc = 0;
+ }
+ spin_unlock(&cpupool_lock);
+ if (!rc)
+ printk(XENLOG_DEBUG "cpupool_add_domain(dom=%d,pool=%d) n_dom %d\n",
+ d->domain_id, poolid, n_dom);
+ return rc;
+}
+
+/*
+ * remove a domain from a cpupool
+ */
+void cpupool_rm_domain(struct domain *d)
+{
+ int cpupool_id;
+ int n_dom;
+
+ if ( d->cpupool == NULL )
+ return;
+ spin_lock(&cpupool_lock);
+ cpupool_id = d->cpupool->cpupool_id;
+ d->cpupool->n_dom--;
+ n_dom = d->cpupool->n_dom;
+ d->cpupool = NULL;
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_rm_domain(dom=%d,pool=%d) n_dom %d\n",
+ d->domain_id, cpupool_id, n_dom);
+ return;
+}
+
+/*
+ * called to add a new cpu to pool admin
+ * we add a hotplugged cpu to the cpupool0 to be able to add it to dom0
+ */
+void cpupool_cpu_add(unsigned int cpu)
+{
+ if ( cpupool0 == NULL )
+ return;
+ spin_lock(&cpupool_lock);
+ cpu_set(cpu, cpupool_free_cpus);
+ cpupool_assign_cpu_locked(cpupool0, cpu);
+ spin_unlock(&cpupool_lock);
+ return;
+}
+
+/*
+ * do cpupool related domctl operations
+ */
+int cpupool_do_domctl(struct xen_domctl_cpupool_op *op)
+{
+ int ret;
+ struct cpupool *c;
+
+ switch ( op->op )
+ {
+
+ case XEN_DOMCTL_CPUPOOL_OP_CREATE:
+ {
+ int poolid;
+ const struct scheduler *sched;
+
+ poolid = (op->cpupool_id == XEN_DOMCTL_CPUPOOL_PAR_ANY) ?
+ CPUPOOLID_NONE: op->cpupool_id;
+ sched = scheduler_get_by_id(op->sched_id);
+ ret = -ENOENT;
+ if ( sched == NULL )
+ break;
+ ret = 0;
+ c = cpupool_create(poolid, sched->opt_name);
+ if ( c == NULL )
+ ret = -EINVAL;
+ else
+ op->cpupool_id = c->cpupool_id;
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_DESTROY:
+ {
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->cpupool_id, 1);
+ spin_unlock(&cpupool_lock);
+ ret = -ENOENT;
+ if ( c == NULL )
+ break;
+ ret = (cpupool_destroy(c) != 0) ? -EBUSY : 0;
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_INFO:
+ {
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->cpupool_id, 0);
+ spin_unlock(&cpupool_lock);
+ ret = -ENOENT;
+ if ( c == NULL )
+ break;
+ op->cpupool_id = c->cpupool_id;
+ op->sched_id = c->sched.sched_id;
+ op->n_dom = c->n_dom;
+ cpumask_to_xenctl_cpumap(&(op->cpumap), &(c->cpu_valid));
+ ret = 0;
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_ADDCPU:
+ {
+ unsigned cpu;
+
+ cpu = op->cpu;
+ printk(XENLOG_DEBUG "cpupool_assign_cpu(pool=%d,cpu=%d)\n",
+ op->cpupool_id, cpu);
+ spin_lock(&cpupool_lock);
+ if ( cpu == XEN_DOMCTL_CPUPOOL_PAR_ANY )
+ cpu = first_cpu(cpupool_free_cpus);
+ ret = -EINVAL;
+ if ( cpu >= NR_CPUS )
+ goto addcpu_out;
+ ret = -EBUSY;
+ if ( !cpu_isset(cpu, cpupool_free_cpus) )
+ goto addcpu_out;
+ c = cpupool_find_by_id(op->cpupool_id, 0);
+ ret = -ENOENT;
+ if ( c == NULL )
+ goto addcpu_out;
+ ret = cpupool_assign_cpu_locked(c, cpu);
+addcpu_out:
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool_assign_cpu(pool=%d,cpu=%d) ret %d\n",
+ op->cpupool_id, cpu, ret);
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_RMCPU:
+ {
+ unsigned cpu;
+
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->cpupool_id, 0);
+ spin_unlock(&cpupool_lock);
+ ret = -ENOENT;
+ if ( c == NULL )
+ break;
+ cpu = op->cpu;
+ if ( cpu == XEN_DOMCTL_CPUPOOL_PAR_ANY )
+ cpu = last_cpu(c->cpu_valid);
+ ret = -EINVAL;
+ if ( cpu >= NR_CPUS )
+ break;
+ /* caution: cpupool_unassign_cpu uses continue_hypercall_on_cpu and
+ * will continue after the local return
+ */
+ ret = cpupool_unassign_cpu(c, cpu);
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_MOVEDOMAIN:
+ {
+ struct domain *d;
+
+ ret = -EINVAL;
+ if ( op->domid == 0 )
+ break;
+ ret = -ESRCH;
+ d = rcu_lock_domain_by_id(op->domid);
+ if ( d == NULL )
+ break;
+ if ( d->cpupool == NULL )
+ {
+ ret = -EINVAL;
+ rcu_unlock_domain(d);
+ break;
+ }
+ printk(XENLOG_DEBUG "cpupool move_domain(dom=%d)->pool=%d\n",
+ d->domain_id, op->cpupool_id);
+ ret = -ENOENT;
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->cpupool_id, 1);
+ if ( (c != NULL) && cpus_weight(c->cpu_valid) )
+ {
+ d->cpupool->n_dom--;
+ ret = sched_move_domain(d, c);
+ if ( ret )
+ d->cpupool->n_dom++;
+ else
+ c->n_dom++;
+ }
+ spin_unlock(&cpupool_lock);
+ printk(XENLOG_DEBUG "cpupool move_domain(dom=%d)->pool=%d ret %d\n",
+ d->domain_id, op->cpupool_id, ret);
+ rcu_unlock_domain(d);
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_FREEINFO:
+ {
+ cpumask_to_xenctl_cpumap(&(op->cpumap),
+ &cpupool_free_cpus);
+ ret = 0;
+ }
+ break;
+
+ default:
+ ret = -ENOSYS;
+
+ }
+
+ return ret;
+}
+
+void schedule_dump(struct cpupool *c);
+
+void dump_runq(unsigned char key)
+{
+ unsigned long flags;
+ s_time_t now = NOW();
+ struct cpupool **c;
+
+ spin_lock(&cpupool_lock);
+ local_irq_save(flags);
+
+ printk("sched_smt_power_savings: %s\n",
+ sched_smt_power_savings? "enabled":"disabled");
+ printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now);
+
+ printk("Idle cpupool:\n");
+ schedule_dump(NULL);
+
+ for_each_cpupool(c)
+ {
+ printk("Cpupool %d:\n", (*c)->cpupool_id);
+ schedule_dump(*c);
+ }
+
+ local_irq_restore(flags);
+ spin_unlock(&cpupool_lock);
+}
+
+static int __init cpupool_init(void)
+{
+ cpupool_free_cpus = cpu_online_map;
+ cpupool_list = NULL;
+ return 0;
+}
+__initcall(cpupool_init);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff -r fadf63ab49e7 xen/common/domain.c
--- a/xen/common/domain.c Mon Apr 19 17:57:28 2010 +0100
+++ b/xen/common/domain.c Tue Apr 20 11:10:40 2010 +0200
@@ -150,6 +150,8 @@ struct vcpu *alloc_vcpu(
tasklet_init(&v->continue_hypercall_tasklet, NULL, 0);
+ tasklet_init(&v->continue_hypercall_tasklet, NULL, 0);
+
if ( is_idle_domain(d) )
{
v->runstate.state = RUNSTATE_running;
@@ -212,7 +214,7 @@ custom_param("extra_guest_irqs", parse_e
custom_param("extra_guest_irqs", parse_extra_guest_irqs);
struct domain *domain_create(
- domid_t domid, unsigned int domcr_flags, ssidref_t ssidref)
+ domid_t domid, int poolid, unsigned int domcr_flags, ssidref_t ssidref)
{
struct domain *d, **pd;
enum { INIT_xsm = 1u<<0, INIT_rangeset = 1u<<1, INIT_evtchn = 1u<<2,
@@ -291,6 +293,9 @@ struct domain *domain_create(
d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
d->irq_caps = rangeset_new(d, "Interrupts", 0);
if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) )
+ goto fail;
+
+ if ( cpupool_add_domain(d, poolid) != 0 )
goto fail;
if ( sched_init_domain(d) != 0 )
@@ -600,6 +605,8 @@ static void complete_domain_destroy(stru
arch_domain_destroy(d);
rangeset_domain_destroy(d);
+
+ cpupool_rm_domain(d);
sched_destroy_domain(d);
diff -r fadf63ab49e7 xen/common/domctl.c
--- a/xen/common/domctl.c Mon Apr 19 17:57:28 2010 +0100
+++ b/xen/common/domctl.c Tue Apr 20 11:10:40 2010 +0200
@@ -11,6 +11,7 @@
#include <xen/lib.h>
#include <xen/mm.h>
#include <xen/sched.h>
+#include <xen/sched-if.h>
#include <xen/domain.h>
#include <xen/event.h>
#include <xen/domain_page.h>
@@ -140,10 +141,12 @@ void getdomaininfo(struct domain *d, str
info->shared_info_frame = mfn_to_gmfn(d, __pa(d->shared_info)>>PAGE_SHIFT);
BUG_ON(SHARED_M2P(info->shared_info_frame));
+ info->cpupool = d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE;
+
memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t));
}
-static unsigned int default_vcpu0_location(void)
+static unsigned int default_vcpu0_location(cpumask_t *online)
{
struct domain *d;
struct vcpu *v;
@@ -173,7 +176,7 @@ static unsigned int default_vcpu0_locati
if ( cpus_weight(per_cpu(cpu_sibling_map, 0)) > 1 )
cpu = next_cpu(cpu, per_cpu(cpu_sibling_map, 0));
cpu_exclude_map = per_cpu(cpu_sibling_map, 0);
- for_each_online_cpu ( i )
+ for_each_cpu_mask(i, *online)
{
if ( cpu_isset(i, cpu_exclude_map) )
continue;
@@ -388,12 +391,14 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
domid_t dom;
static domid_t rover = 0;
unsigned int domcr_flags;
+ int pool = 0;
ret = -EINVAL;
if ( supervisor_mode_kernel ||
(op->u.createdomain.flags &
~(XEN_DOMCTL_CDF_hvm_guest | XEN_DOMCTL_CDF_hap |
- XEN_DOMCTL_CDF_s3_integrity | XEN_DOMCTL_CDF_oos_off)) )
+ XEN_DOMCTL_CDF_s3_integrity | XEN_DOMCTL_CDF_oos_off |
+ XEN_DOMCTL_CDF_pool)) )
break;
dom = op->domain;
@@ -429,9 +434,15 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
domcr_flags |= DOMCRF_s3_integrity;
if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_oos_off )
domcr_flags |= DOMCRF_oos_off;
+ if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_pool )
+ pool = op->u.createdomain.cpupool;
+
+ ret = -EINVAL;
+ if ( pool == CPUPOOLID_NONE )
+ break;
ret = -ENOMEM;
- d = domain_create(dom, domcr_flags, op->u.createdomain.ssidref);
+ d = domain_create(dom, pool, domcr_flags, op->u.createdomain.ssidref);
if ( d == NULL )
break;
@@ -450,6 +461,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
{
struct domain *d;
unsigned int i, max = op->u.max_vcpus.max, cpu;
+ cpumask_t *online;
ret = -ESRCH;
if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
@@ -498,6 +510,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
goto maxvcpu_out;
ret = -ENOMEM;
+ online = (d->cpupool == NULL) ? &cpu_online_map :
&d->cpupool->cpu_valid;
if ( max > d->max_vcpus )
{
struct vcpu **vcpus;
@@ -521,8 +534,8 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
continue;
cpu = (i == 0) ?
- default_vcpu0_location() :
- cycle_cpu(d->vcpu[i-1]->processor, cpu_online_map);
+ default_vcpu0_location(online) :
+ cycle_cpu(d->vcpu[i-1]->processor, *online);
if ( alloc_vcpu(d, i, cpu) == NULL )
goto maxvcpu_out;
@@ -961,6 +974,14 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
}
break;
+ case XEN_DOMCTL_cpupool_op:
+ {
+ ret = cpupool_do_domctl(&op->u.cpupool_op);
+ if ( (ret == 0) && copy_to_guest(u_domctl, op, 1) )
+ ret = -EFAULT;
+ }
+ break;
+
default:
ret = arch_do_domctl(op, u_domctl);
break;
diff -r fadf63ab49e7 xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Mon Apr 19 17:57:28 2010 +0100
+++ b/xen/common/sched_credit.c Tue Apr 20 11:10:40 2010 +0200
@@ -70,11 +70,15 @@
/*
* Useful macros
*/
+#define CSCHED_PRIV(_ops) \
+ ((struct csched_private *)((_ops)->sched_data))
#define CSCHED_PCPU(_c) \
((struct csched_pcpu *)per_cpu(schedule_data, _c).sched_priv)
#define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv)
#define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv)
#define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq))
+#define CSCHED_CPUONLINE(_pool) \
+ (((_pool) == NULL) ? &cpupool_free_cpus : &(_pool)->cpu_valid)
/*
@@ -160,19 +164,22 @@ struct csched_private {
struct timer master_ticker;
unsigned int master;
cpumask_t idlers;
+ cpumask_t cpus;
uint32_t weight;
uint32_t credit;
int credit_balance;
uint32_t runq_sort;
+ int ticker_active;
};
/*
* Global variables
*/
-static struct csched_private csched_priv;
+static struct csched_private *csched_priv0 = NULL;
static void csched_tick(void *_cpu);
+static void csched_acct(void *dummy);
static inline int
__vcpu_on_runq(struct csched_vcpu *svc)
@@ -238,6 +245,7 @@ __runq_tickle(unsigned int cpu, struct c
{
struct csched_vcpu * const cur =
CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
+ struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
cpumask_t mask;
ASSERT(cur);
@@ -264,7 +272,7 @@ __runq_tickle(unsigned int cpu, struct c
*/
if ( cur->pri > CSCHED_PRI_IDLE )
{
- if ( cpus_empty(csched_priv.idlers) )
+ if ( cpus_empty(prv->idlers) )
{
CSCHED_STAT_CRANK(tickle_idlers_none);
}
@@ -272,7 +280,7 @@ __runq_tickle(unsigned int cpu, struct c
{
cpumask_t idle_mask;
- cpus_and(idle_mask, csched_priv.idlers, new->vcpu->cpu_affinity);
+ cpus_and(idle_mask, prv->idlers, new->vcpu->cpu_affinity);
if ( !cpus_empty(idle_mask) )
{
CSCHED_STAT_CRANK(tickle_idlers_some);
@@ -294,40 +302,80 @@ __runq_tickle(unsigned int cpu, struct c
cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ);
}
-static int
-csched_pcpu_init(int cpu)
+static void
+csched_free_pdata(struct scheduler *ops, void *pcpu, int cpu)
+{
+ struct csched_private *prv = CSCHED_PRIV(ops);
+ struct csched_pcpu *spc = pcpu;
+ unsigned long flags;
+
+ if ( spc == NULL )
+ return;
+
+ spin_lock_irqsave(&prv->lock, flags);
+
+ prv->credit -= CSCHED_CREDITS_PER_ACCT;
+ prv->ncpus--;
+ cpu_clear(cpu, prv->idlers);
+ cpu_clear(cpu, prv->cpus);
+ if ( (prv->master == cpu) && (prv->ncpus > 0) )
+ {
+ prv->master = first_cpu(prv->cpus);
+ migrate_timer(&prv->master_ticker, prv->master);
+ }
+ kill_timer(&spc->ticker);
+ if ( prv->ncpus == 0 )
+ kill_timer(&prv->master_ticker);
+
+ spin_unlock_irqrestore(&prv->lock, flags);
+
+ xfree(spc);
+}
+
+static void *
+csched_alloc_pdata(struct scheduler *ops, int cpu)
{
struct csched_pcpu *spc;
+ struct csched_private *prv = CSCHED_PRIV(ops);
unsigned long flags;
/* Allocate per-PCPU info */
spc = xmalloc(struct csched_pcpu);
if ( spc == NULL )
- return -1;
+ return NULL;
memset(spc, 0, sizeof(*spc));
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&prv->lock, flags);
/* Initialize/update system-wide config */
- csched_priv.credit += CSCHED_CREDITS_PER_ACCT;
- if ( csched_priv.ncpus <= cpu )
- csched_priv.ncpus = cpu + 1;
- if ( csched_priv.master >= csched_priv.ncpus )
- csched_priv.master = cpu;
+ prv->credit += CSCHED_CREDITS_PER_ACCT;
+ prv->ncpus++;
+ cpu_set(cpu, prv->cpus);
+ if ( (prv->ncpus == 1) && (prv != csched_priv0) )
+ {
+ prv->master = cpu;
+ init_timer( &prv->master_ticker, csched_acct, prv, cpu);
+ prv->ticker_active = 2;
+ }
init_timer(&spc->ticker, csched_tick, (void *)(unsigned long)cpu, cpu);
+
+ if ( prv == csched_priv0 )
+ prv->master = first_cpu(prv->cpus);
+
INIT_LIST_HEAD(&spc->runq);
- spc->runq_sort_last = csched_priv.runq_sort;
+ spc->runq_sort_last = prv->runq_sort;
spc->idle_bias = NR_CPUS - 1;
- per_cpu(schedule_data, cpu).sched_priv = spc;
+ if ( per_cpu(schedule_data, cpu).sched_priv == NULL )
+ per_cpu(schedule_data, cpu).sched_priv = spc;
/* Start off idling... */
BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr));
- cpu_set(cpu, csched_priv.idlers);
+ cpu_set(cpu, prv->idlers);
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ spin_unlock_irqrestore(&prv->lock, flags);
- return 0;
+ return spc;
}
#ifndef NDEBUG
@@ -400,17 +448,19 @@ __csched_vcpu_is_migrateable(struct vcpu
}
static int
-_csched_cpu_pick(struct vcpu *vc, bool_t commit)
+_csched_cpu_pick(struct scheduler *ops, struct vcpu *vc, bool_t commit)
{
cpumask_t cpus;
cpumask_t idlers;
+ cpumask_t *online;
int cpu;
/*
* Pick from online CPUs in VCPU's affinity mask, giving a
* preference to its current processor if it's in there.
*/
- cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
+ online = CSCHED_CPUONLINE(vc->domain->cpupool);
+ cpus_and(cpus, *online, vc->cpu_affinity);
cpu = cpu_isset(vc->processor, cpus)
? vc->processor
: cycle_cpu(vc->processor, cpus);
@@ -428,7 +478,7 @@ _csched_cpu_pick(struct vcpu *vc, bool_t
* like run two VCPUs on co-hyperthreads while there are idle cores
* or sockets.
*/
- cpus_and(idlers, cpu_online_map, csched_priv.idlers);
+ cpus_and(idlers, cpu_online_map, CSCHED_PRIV(ops)->idlers);
cpu_set(cpu, idlers);
cpus_and(cpus, cpus, idlers);
cpu_clear(cpu, cpus);
@@ -474,18 +524,18 @@ _csched_cpu_pick(struct vcpu *vc, bool_t
}
static int
-csched_cpu_pick(struct vcpu *vc)
+csched_cpu_pick(struct scheduler *ops, struct vcpu *vc)
{
- return _csched_cpu_pick(vc, 1);
+ return _csched_cpu_pick(ops, vc, 1);
}
static inline void
-__csched_vcpu_acct_start(struct csched_vcpu *svc)
+__csched_vcpu_acct_start(struct csched_private *prv, struct csched_vcpu *svc)
{
struct csched_dom * const sdom = svc->sdom;
unsigned long flags;
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&prv->lock, flags);
if ( list_empty(&svc->active_vcpu_elem) )
{
@@ -496,16 +546,17 @@ __csched_vcpu_acct_start(struct csched_v
list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
if ( list_empty(&sdom->active_sdom_elem) )
{
- list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
- csched_priv.weight += sdom->weight;
+ list_add(&sdom->active_sdom_elem, &prv->active_sdom);
+ prv->weight += sdom->weight;
}
}
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ spin_unlock_irqrestore(&prv->lock, flags);
}
static inline void
-__csched_vcpu_acct_stop_locked(struct csched_vcpu *svc)
+__csched_vcpu_acct_stop_locked(struct csched_private *prv,
+ struct csched_vcpu *svc)
{
struct csched_dom * const sdom = svc->sdom;
@@ -518,16 +569,17 @@ __csched_vcpu_acct_stop_locked(struct cs
list_del_init(&svc->active_vcpu_elem);
if ( list_empty(&sdom->active_vcpu) )
{
- BUG_ON( csched_priv.weight < sdom->weight );
+ BUG_ON( prv->weight < sdom->weight );
list_del_init(&sdom->active_sdom_elem);
- csched_priv.weight -= sdom->weight;
+ prv->weight -= sdom->weight;
}
}
static void
-csched_vcpu_acct(unsigned int cpu)
+csched_vcpu_acct(struct csched_private *prv, unsigned int cpu)
{
struct csched_vcpu * const svc = CSCHED_VCPU(current);
+ struct scheduler *ops = per_cpu(scheduler, cpu);
ASSERT( current->processor == cpu );
ASSERT( svc->sdom != NULL );
@@ -556,9 +608,9 @@ csched_vcpu_acct(unsigned int cpu)
*/
if ( list_empty(&svc->active_vcpu_elem) )
{
- __csched_vcpu_acct_start(svc);
+ __csched_vcpu_acct_start(prv, svc);
}
- else if ( _csched_cpu_pick(current, 0) != cpu )
+ else if ( _csched_cpu_pick(ops, current, 0) != cpu )
{
CSCHED_VCPU_STAT_CRANK(svc, migrate_r);
CSCHED_STAT_CRANK(migrate_running);
@@ -567,66 +619,75 @@ csched_vcpu_acct(unsigned int cpu)
}
}
-static int
-csched_vcpu_init(struct vcpu *vc)
+static void *
+csched_alloc_vdata(struct scheduler *ops, struct vcpu *vc, void *dd)
{
- struct domain * const dom = vc->domain;
- struct csched_dom *sdom = CSCHED_DOM(dom);
struct csched_vcpu *svc;
-
- CSCHED_STAT_CRANK(vcpu_init);
/* Allocate per-VCPU info */
svc = xmalloc(struct csched_vcpu);
if ( svc == NULL )
- return -1;
+ return NULL;
memset(svc, 0, sizeof(*svc));
INIT_LIST_HEAD(&svc->runq_elem);
INIT_LIST_HEAD(&svc->active_vcpu_elem);
- svc->sdom = sdom;
+ svc->sdom = dd;
svc->vcpu = vc;
atomic_set(&svc->credit, 0);
svc->flags = 0U;
- svc->pri = is_idle_domain(dom) ? CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
+ svc->pri = is_idle_domain(vc->domain) ?
+ CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
CSCHED_VCPU_STATS_RESET(svc);
- vc->sched_priv = svc;
-
- /* Allocate per-PCPU info */
- if ( unlikely(!CSCHED_PCPU(vc->processor)) )
- {
- if ( csched_pcpu_init(vc->processor) != 0 )
- return -1;
- }
-
- CSCHED_VCPU_CHECK(vc);
- return 0;
+ CSCHED_STAT_CRANK(vcpu_init);
+ return svc;
}
static void
-csched_vcpu_destroy(struct vcpu *vc)
+csched_vcpu_insert(struct scheduler *ops, struct vcpu *vc)
+{
+ struct csched_vcpu *svc = vc->sched_priv;
+
+ if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running )
+ __runq_insert(vc->processor, svc);
+}
+
+static void
+csched_free_vdata(struct scheduler *ops, void *priv)
+{
+ struct csched_private *prv = CSCHED_PRIV(ops);
+ struct csched_vcpu *svc = priv;
+ unsigned long flags;
+
+ if ( __vcpu_on_runq(svc) )
+ __runq_remove(svc);
+
+ spin_lock_irqsave(&(prv->lock), flags);
+
+ if ( !list_empty(&svc->active_vcpu_elem) )
+ __csched_vcpu_acct_stop_locked(prv, svc);
+
+ spin_unlock_irqrestore(&(prv->lock), flags);
+
+ xfree(svc);
+}
+
+static void
+csched_vcpu_destroy(struct scheduler *ops, struct vcpu *vc)
{
struct csched_vcpu * const svc = CSCHED_VCPU(vc);
struct csched_dom * const sdom = svc->sdom;
- unsigned long flags;
CSCHED_STAT_CRANK(vcpu_destroy);
BUG_ON( sdom == NULL );
BUG_ON( !list_empty(&svc->runq_elem) );
- spin_lock_irqsave(&csched_priv.lock, flags);
-
- if ( !list_empty(&svc->active_vcpu_elem) )
- __csched_vcpu_acct_stop_locked(svc);
-
- spin_unlock_irqrestore(&csched_priv.lock, flags);
-
- xfree(svc);
+ csched_free_vdata(ops, svc);
}
static void
-csched_vcpu_sleep(struct vcpu *vc)
+csched_vcpu_sleep(struct scheduler *ops, struct vcpu *vc)
{
struct csched_vcpu * const svc = CSCHED_VCPU(vc);
@@ -641,7 +702,7 @@ csched_vcpu_sleep(struct vcpu *vc)
}
static void
-csched_vcpu_wake(struct vcpu *vc)
+csched_vcpu_wake(struct scheduler *ops, struct vcpu *vc)
{
struct csched_vcpu * const svc = CSCHED_VCPU(vc);
const unsigned int cpu = vc->processor;
@@ -697,10 +758,12 @@ csched_vcpu_wake(struct vcpu *vc)
static int
csched_dom_cntl(
+ struct scheduler *ops,
struct domain *d,
struct xen_domctl_scheduler_op *op)
{
struct csched_dom * const sdom = CSCHED_DOM(d);
+ struct csched_private *prv = CSCHED_PRIV(ops);
unsigned long flags;
if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
@@ -712,14 +775,14 @@ csched_dom_cntl(
{
ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&prv->lock, flags);
if ( op->u.credit.weight != 0 )
{
if ( !list_empty(&sdom->active_sdom_elem) )
{
- csched_priv.weight -= sdom->weight;
- csched_priv.weight += op->u.credit.weight;
+ prv->weight -= sdom->weight;
+ prv->weight += op->u.credit.weight;
}
sdom->weight = op->u.credit.weight;
}
@@ -727,25 +790,20 @@ csched_dom_cntl(
if ( op->u.credit.cap != (uint16_t)~0U )
sdom->cap = op->u.credit.cap;
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ spin_unlock_irqrestore(&prv->lock, flags);
}
return 0;
}
-static int
-csched_dom_init(struct domain *dom)
+static void *
+csched_alloc_domdata(struct scheduler *ops, struct domain *dom)
{
struct csched_dom *sdom;
- CSCHED_STAT_CRANK(dom_init);
-
- if ( is_idle_domain(dom) )
- return 0;
-
sdom = xmalloc(struct csched_dom);
if ( sdom == NULL )
- return -ENOMEM;
+ return NULL;
memset(sdom, 0, sizeof(*sdom));
/* Initialize credit and weight */
@@ -755,16 +813,40 @@ csched_dom_init(struct domain *dom)
sdom->dom = dom;
sdom->weight = CSCHED_DEFAULT_WEIGHT;
sdom->cap = 0U;
+
+ return (void *)sdom;
+}
+
+static int
+csched_dom_init(struct scheduler *ops, struct domain *dom)
+{
+ struct csched_dom *sdom;
+
+ CSCHED_STAT_CRANK(dom_init);
+
+ if ( is_idle_domain(dom) )
+ return 0;
+
+ sdom = csched_alloc_domdata(ops, dom);
+ if ( sdom == NULL )
+ return -ENOMEM;
+
dom->sched_priv = sdom;
return 0;
}
static void
-csched_dom_destroy(struct domain *dom)
+csched_free_domdata(struct scheduler *ops, void *data)
+{
+ xfree(data);
+}
+
+static void
+csched_dom_destroy(struct scheduler *ops, struct domain *dom)
{
CSCHED_STAT_CRANK(dom_destroy);
- xfree(CSCHED_DOM(dom));
+ csched_free_domdata(ops, CSCHED_DOM(dom));
}
/*
@@ -775,7 +857,7 @@ csched_dom_destroy(struct domain *dom)
* remember the last UNDER to make the move up operation O(1).
*/
static void
-csched_runq_sort(unsigned int cpu)
+csched_runq_sort(struct csched_private *prv, unsigned int cpu)
{
struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
struct list_head *runq, *elem, *next, *last_under;
@@ -783,7 +865,7 @@ csched_runq_sort(unsigned int cpu)
unsigned long flags;
int sort_epoch;
- sort_epoch = csched_priv.runq_sort;
+ sort_epoch = prv->runq_sort;
if ( sort_epoch == spc->runq_sort_last )
return;
@@ -820,6 +902,7 @@ static void
static void
csched_acct(void* dummy)
{
+ struct csched_private *prv = dummy;
unsigned long flags;
struct list_head *iter_vcpu, *next_vcpu;
struct list_head *iter_sdom, *next_sdom;
@@ -836,22 +919,22 @@ csched_acct(void* dummy)
int credit;
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&prv->lock, flags);
- weight_total = csched_priv.weight;
- credit_total = csched_priv.credit;
+ weight_total = prv->weight;
+ credit_total = prv->credit;
/* Converge balance towards 0 when it drops negative */
- if ( csched_priv.credit_balance < 0 )
+ if ( prv->credit_balance < 0 )
{
- credit_total -= csched_priv.credit_balance;
+ credit_total -= prv->credit_balance;
CSCHED_STAT_CRANK(acct_balance);
}
if ( unlikely(weight_total == 0) )
{
- csched_priv.credit_balance = 0;
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ prv->credit_balance = 0;
+ spin_unlock_irqrestore(&prv->lock, flags);
CSCHED_STAT_CRANK(acct_no_work);
goto out;
}
@@ -863,7 +946,7 @@ csched_acct(void* dummy)
credit_xtra = 0;
credit_cap = 0U;
- list_for_each_safe( iter_sdom, next_sdom, &csched_priv.active_sdom )
+ list_for_each_safe( iter_sdom, next_sdom, &prv->active_sdom )
{
sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
@@ -883,9 +966,9 @@ csched_acct(void* dummy)
* only when the system-wide credit balance is negative.
*/
credit_peak = sdom->active_vcpu_count * CSCHED_CREDITS_PER_ACCT;
- if ( csched_priv.credit_balance < 0 )
+ if ( prv->credit_balance < 0 )
{
- credit_peak += ( ( -csched_priv.credit_balance * sdom->weight) +
+ credit_peak += ( ( -prv->credit_balance * sdom->weight) +
(weight_total - 1)
) / weight_total;
}
@@ -927,7 +1010,7 @@ csched_acct(void* dummy)
*/
CSCHED_STAT_CRANK(acct_reorder);
list_del(&sdom->active_sdom_elem);
- list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+ list_add(&sdom->active_sdom_elem, &prv->active_sdom);
}
credit_fair = credit_peak;
@@ -993,7 +1076,7 @@ csched_acct(void* dummy)
/* Upper bound on credits means VCPU stops earning */
if ( credit > CSCHED_CREDITS_PER_TSLICE )
{
- __csched_vcpu_acct_stop_locked(svc);
+ __csched_vcpu_acct_stop_locked(prv, svc);
credit = 0;
atomic_set(&svc->credit, credit);
}
@@ -1005,15 +1088,15 @@ csched_acct(void* dummy)
}
}
- csched_priv.credit_balance = credit_balance;
+ prv->credit_balance = credit_balance;
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ spin_unlock_irqrestore(&prv->lock, flags);
/* Inform each CPU that its runq needs to be sorted */
- csched_priv.runq_sort++;
+ prv->runq_sort++;
out:
- set_timer( &csched_priv.master_ticker, NOW() +
+ set_timer( &prv->master_ticker, NOW() +
MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT );
}
@@ -1022,6 +1105,7 @@ csched_tick(void *_cpu)
{
unsigned int cpu = (unsigned long)_cpu;
struct csched_pcpu *spc = CSCHED_PCPU(cpu);
+ struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
spc->tick++;
@@ -1029,7 +1113,7 @@ csched_tick(void *_cpu)
* Accounting for running VCPU
*/
if ( !is_idle_vcpu(current) )
- csched_vcpu_acct(cpu);
+ csched_vcpu_acct(prv, cpu);
/*
* Check if runq needs to be sorted
@@ -1038,7 +1122,7 @@ csched_tick(void *_cpu)
* modified priorities. This is a special O(n) sort and runs at most
* once per accounting period (currently 30 milliseconds).
*/
- csched_runq_sort(cpu);
+ csched_runq_sort(prv, cpu);
set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
}
@@ -1091,16 +1175,19 @@ csched_runq_steal(int peer_cpu, int cpu,
}
static struct csched_vcpu *
-csched_load_balance(int cpu, struct csched_vcpu *snext)
+csched_load_balance(struct csched_private *prv, int cpu,
+ struct csched_vcpu *snext)
{
struct csched_vcpu *speer;
cpumask_t workers;
+ cpumask_t *online;
int peer_cpu;
BUG_ON( cpu != snext->vcpu->processor );
+ online = CSCHED_CPUONLINE(per_cpu(cpupool, cpu));
/* If this CPU is going offline we shouldn't steal work. */
- if ( unlikely(!cpu_online(cpu)) )
+ if ( unlikely(!cpu_isset(cpu, *online)) )
goto out;
if ( snext->pri == CSCHED_PRI_IDLE )
@@ -1114,7 +1201,7 @@ csched_load_balance(int cpu, struct csch
* Peek at non-idling CPUs in the system, starting with our
* immediate neighbour.
*/
- cpus_andnot(workers, cpu_online_map, csched_priv.idlers);
+ cpus_andnot(workers, *online, prv->idlers);
cpu_clear(cpu, workers);
peer_cpu = cpu;
@@ -1156,11 +1243,12 @@ csched_load_balance(int cpu, struct csch
* fast for the common case.
*/
static struct task_slice
-csched_schedule(s_time_t now)
+csched_schedule(struct scheduler *ops, s_time_t now)
{
const int cpu = smp_processor_id();
struct list_head * const runq = RUNQ(cpu);
struct csched_vcpu * const scurr = CSCHED_VCPU(current);
+ struct csched_private *prv = CSCHED_PRIV(ops);
struct csched_vcpu *snext;
struct task_slice ret;
@@ -1207,7 +1295,7 @@ csched_schedule(s_time_t now)
if ( snext->pri > CSCHED_PRI_TS_OVER )
__runq_remove(snext);
else
- snext = csched_load_balance(cpu, snext);
+ snext = csched_load_balance(prv, cpu, snext);
/*
* Update idlers mask if necessary. When we're idling, other CPUs
@@ -1215,12 +1303,12 @@ csched_schedule(s_time_t now)
*/
if ( snext->pri == CSCHED_PRI_IDLE )
{
- if ( !cpu_isset(cpu, csched_priv.idlers) )
- cpu_set(cpu, csched_priv.idlers);
+ if ( !cpu_isset(cpu, prv->idlers) )
+ cpu_set(cpu, prv->idlers);
}
- else if ( cpu_isset(cpu, csched_priv.idlers) )
+ else if ( cpu_isset(cpu, prv->idlers) )
{
- cpu_clear(cpu, csched_priv.idlers);
+ cpu_clear(cpu, prv->idlers);
}
if ( !is_idle_vcpu(snext->vcpu) )
@@ -1267,7 +1355,7 @@ csched_dump_vcpu(struct csched_vcpu *svc
}
static void
-csched_dump_pcpu(int cpu)
+csched_dump_pcpu(struct scheduler *ops, int cpu)
{
struct list_head *runq, *iter;
struct csched_pcpu *spc;
@@ -1305,9 +1393,10 @@ csched_dump_pcpu(int cpu)
}
static void
-csched_dump(void)
+csched_dump(struct scheduler *ops)
{
struct list_head *iter_sdom, *iter_svc;
+ struct csched_private *prv = CSCHED_PRIV(ops);
int loop;
#define idlers_buf keyhandler_scratch
@@ -1324,12 +1413,12 @@ csched_dump(void)
"\tticks per tslice = %d\n"
"\tticks per acct = %d\n"
"\tmigration delay = %uus\n",
- csched_priv.ncpus,
- csched_priv.master,
- csched_priv.credit,
- csched_priv.credit_balance,
- csched_priv.weight,
- csched_priv.runq_sort,
+ prv->ncpus,
+ prv->master,
+ prv->credit,
+ prv->credit_balance,
+ prv->weight,
+ prv->runq_sort,
CSCHED_DEFAULT_WEIGHT,
CSCHED_MSECS_PER_TICK,
CSCHED_CREDITS_PER_MSEC,
@@ -1337,12 +1426,12 @@ csched_dump(void)
CSCHED_TICKS_PER_ACCT,
vcpu_migration_delay);
- cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), csched_priv.idlers);
+ cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), prv->idlers);
printk("idlers: %s\n", idlers_buf);
printk("active vcpus:\n");
loop = 0;
- list_for_each( iter_sdom, &csched_priv.active_sdom )
+ list_for_each( iter_sdom, &prv->active_sdom )
{
struct csched_dom *sdom;
sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
@@ -1359,18 +1448,30 @@ csched_dump(void)
#undef idlers_buf
}
-static void
-csched_init(void)
+static int
+csched_init(struct scheduler *ops, int pool0)
{
- spin_lock_init(&csched_priv.lock);
- INIT_LIST_HEAD(&csched_priv.active_sdom);
- csched_priv.ncpus = 0;
- csched_priv.master = UINT_MAX;
- cpus_clear(csched_priv.idlers);
- csched_priv.weight = 0U;
- csched_priv.credit = 0U;
- csched_priv.credit_balance = 0;
- csched_priv.runq_sort = 0U;
+ struct csched_private *prv;
+
+ prv = xmalloc(struct csched_private);
+ if ( prv == NULL )
+ return 1;
+ memset(prv, 0, sizeof(*prv));
+ if ( pool0 )
+ csched_priv0 = prv;
+ ops->sched_data = prv;
+ spin_lock_init(&prv->lock);
+ INIT_LIST_HEAD(&prv->active_sdom);
+ prv->ncpus = 0;
+ prv->master = UINT_MAX;
+ cpus_clear(prv->idlers);
+ prv->weight = 0U;
+ prv->credit = 0U;
+ prv->credit_balance = 0;
+ prv->runq_sort = 0U;
+ prv->ticker_active = (csched_priv0 == prv) ? 0 : 1;
+
+ return 0;
}
/* Tickers cannot be kicked until SMP subsystem is alive. */
@@ -1380,8 +1481,10 @@ static __init int csched_start_tickers(v
unsigned int cpu;
/* Is the credit scheduler initialised? */
- if ( csched_priv.ncpus == 0 )
+ if ( (csched_priv0 == NULL) || (csched_priv0->ncpus == 0) )
return 0;
+
+ csched_priv0->ticker_active = 1;
for_each_online_cpu ( cpu )
{
@@ -1389,45 +1492,72 @@ static __init int csched_start_tickers(v
set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
}
- init_timer( &csched_priv.master_ticker, csched_acct, NULL,
- csched_priv.master);
+ init_timer( &csched_priv0->master_ticker, csched_acct, csched_priv0,
+ csched_priv0->master);
- set_timer( &csched_priv.master_ticker, NOW() +
+ set_timer( &csched_priv0->master_ticker, NOW() +
MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT );
return 0;
}
__initcall(csched_start_tickers);
-static void csched_tick_suspend(void)
+static void
+csched_deinit(struct scheduler *ops)
+{
+ struct csched_private *prv;
+
+ prv = CSCHED_PRIV(ops);
+ if ( prv != NULL )
+ xfree(prv);
+}
+
+static void csched_tick_suspend(struct scheduler *ops, unsigned int cpu)
{
struct csched_pcpu *spc;
- spc = CSCHED_PCPU(smp_processor_id());
+ spc = CSCHED_PCPU(cpu);
stop_timer(&spc->ticker);
}
-static void csched_tick_resume(void)
+static void csched_tick_resume(struct scheduler *ops, unsigned int cpu)
{
struct csched_pcpu *spc;
uint64_t now = NOW();
+ struct csched_private *prv;
- spc = CSCHED_PCPU(smp_processor_id());
+ prv = CSCHED_PRIV(ops);
+ if ( !prv->ticker_active )
+ return;
+
+
+ spc = CSCHED_PCPU(cpu);
set_timer(&spc->ticker, now + MILLISECS(CSCHED_MSECS_PER_TICK)
- now % MILLISECS(CSCHED_MSECS_PER_TICK) );
+
+ if ( (prv->ticker_active == 2) && (prv->master == cpu) )
+ {
+ set_timer( &prv->master_ticker, now +
+ MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT -
+ now % MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT);
+ prv->ticker_active = 1;
+ }
}
-const struct scheduler sched_credit_def = {
+static struct csched_private _csched_priv;
+
+struct scheduler sched_credit_def = {
.name = "SMP Credit Scheduler",
.opt_name = "credit",
.sched_id = XEN_SCHEDULER_CREDIT,
+ .sched_data = &_csched_priv,
.init_domain = csched_dom_init,
.destroy_domain = csched_dom_destroy,
- .init_vcpu = csched_vcpu_init,
+ .insert_vcpu = csched_vcpu_insert,
.destroy_vcpu = csched_vcpu_destroy,
.sleep = csched_vcpu_sleep,
@@ -1441,6 +1571,13 @@ const struct scheduler sched_credit_def
.dump_cpu_state = csched_dump_pcpu,
.dump_settings = csched_dump,
.init = csched_init,
+ .deinit = csched_deinit,
+ .alloc_vdata = csched_alloc_vdata,
+ .free_vdata = csched_free_vdata,
+ .alloc_pdata = csched_alloc_pdata,
+ .free_pdata = csched_free_pdata,
+ .alloc_domdata = csched_alloc_domdata,
+ .free_domdata = csched_free_domdata,
.tick_suspend = csched_tick_suspend,
.tick_resume = csched_tick_resume,
diff -r fadf63ab49e7 xen/common/sched_credit2.c
--- a/xen/common/sched_credit2.c Mon Apr 19 17:57:28 2010 +0100
+++ b/xen/common/sched_credit2.c Tue Apr 20 11:10:40 2010 +0200
@@ -149,12 +149,16 @@
/*
* Useful macros
*/
+#define CSCHED_PRIV(_ops) \
+ ((struct csched_private *)((_ops)->sched_data))
#define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv)
#define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv)
+#define CSCHED_CPUONLINE(_pool) \
+ (((_pool) == NULL) ? &cpupool_free_cpus : &(_pool)->cpu_valid)
/* CPU to runq_id macro */
-#define c2r(_cpu) (csched_priv.runq_map[(_cpu)])
+#define c2r(_ops, _cpu) (CSCHED_PRIV(_ops)->runq_map[(_cpu)])
/* CPU to runqueue struct macro */
-#define RQD(_cpu) (&csched_priv.rqd[c2r(_cpu)])
+#define RQD(_ops, _cpu) (&CSCHED_PRIV(_ops)->rqd[c2r(_ops, _cpu)])
/*
* Per-runqueue data
@@ -212,11 +216,6 @@ struct csched_dom {
uint16_t nr_vcpus;
};
-
-/*
- * Global variables
- */
-static struct csched_private csched_priv;
/*
* Time-to-credit, credit-to-time.
@@ -284,15 +283,15 @@ __runq_insert(struct list_head *runq, st
}
static void
-runq_insert(unsigned int cpu, struct csched_vcpu *svc)
+runq_insert(struct scheduler *ops, unsigned int cpu, struct csched_vcpu *svc)
{
- struct list_head * runq = &RQD(cpu)->runq;
+ struct list_head * runq = &RQD(ops, cpu)->runq;
int pos = 0;
ASSERT( spin_is_locked(per_cpu(schedule_data, cpu).schedule_lock) );
BUG_ON( __vcpu_on_runq(svc) );
- BUG_ON( c2r(cpu) != c2r(svc->vcpu->processor) );
+ BUG_ON( c2r(ops, cpu) != c2r(ops, svc->vcpu->processor) );
pos = __runq_insert(runq, svc);
@@ -324,11 +323,12 @@ void burn_credits(struct csched_runqueue
/* Check to see if the item on the runqueue is higher priority than what's
* currently running; if so, wake up the processor */
static /*inline*/ void
-runq_tickle(unsigned int cpu, struct csched_vcpu *new, s_time_t now)
+runq_tickle(struct scheduler *ops, unsigned int cpu, struct csched_vcpu *new,
s_time_t now)
{
int i, ipid=-1;
s_time_t lowest=(1<<30);
- struct csched_runqueue_data *rqd = RQD(cpu);
+ struct csched_runqueue_data *rqd = RQD(ops, cpu);
+ cpumask_t *online;
d2printk("rqt d%dv%d cd%dv%d\n",
new->vcpu->domain->domain_id,
@@ -336,13 +336,14 @@ runq_tickle(unsigned int cpu, struct csc
current->domain->domain_id,
current->vcpu_id);
+ online = CSCHED_CPUONLINE(per_cpu(cpupool, cpu));
/* Find the cpu in this queue group that has the lowest credits */
for ( i=rqd->cpu_min ; i < rqd->cpu_max ; i++ )
{
struct csched_vcpu * cur;
/* Skip cpus that aren't online */
- if ( !cpu_online(i) )
+ if ( !cpu_isset(i, *online) )
continue;
cur = CSCHED_VCPU(per_cpu(schedule_data, i).curr);
@@ -396,11 +397,11 @@ runq_tickle(unsigned int cpu, struct csc
/*
* Credit-related code
*/
-static void reset_credit(int cpu, s_time_t now)
+static void reset_credit(struct scheduler *ops, int cpu, s_time_t now)
{
struct list_head *iter;
- list_for_each( iter, &RQD(cpu)->svc )
+ list_for_each( iter, &RQD(ops, cpu)->svc )
{
struct csched_vcpu * svc = list_entry(iter, struct csched_vcpu,
rqd_elem);
@@ -521,64 +522,100 @@ __csched_vcpu_check(struct vcpu *vc)
#define CSCHED_VCPU_CHECK(_vc)
#endif
-static int
-csched_vcpu_init(struct vcpu *vc)
+static void *
+csched_alloc_vdata(struct scheduler *ops, struct vcpu *vc, void *dd)
{
- struct domain * const dom = vc->domain;
- struct csched_dom *sdom = CSCHED_DOM(dom);
struct csched_vcpu *svc;
-
- printk("%s: Initializing d%dv%d\n",
- __func__, dom->domain_id, vc->vcpu_id);
/* Allocate per-VCPU info */
svc = xmalloc(struct csched_vcpu);
if ( svc == NULL )
- return -1;
+ return NULL;
+ memset(svc, 0, sizeof(*svc));
INIT_LIST_HEAD(&svc->rqd_elem);
INIT_LIST_HEAD(&svc->sdom_elem);
INIT_LIST_HEAD(&svc->runq_elem);
- svc->sdom = sdom;
+ svc->sdom = dd;
svc->vcpu = vc;
svc->flags = 0U;
- vc->sched_priv = svc;
if ( ! is_idle_vcpu(vc) )
{
- BUG_ON( sdom == NULL );
+ BUG_ON( svc->sdom == NULL );
svc->credit = CSCHED_CREDIT_INIT;
- svc->weight = sdom->weight;
+ svc->weight = svc->sdom->weight;
+ }
+ else
+ {
+ BUG_ON( svc->sdom != NULL );
+ svc->credit = CSCHED_IDLE_CREDIT;
+ svc->weight = 0;
+ }
+ return svc;
+}
+
+static void
+csched_vcpu_insert(struct scheduler *ops, struct vcpu *vc)
+{
+ struct csched_vcpu *svc = vc->sched_priv;
+ struct domain * const dom = vc->domain;
+ struct csched_dom *sdom = CSCHED_DOM(dom);
+
+ printk("%s: Inserting d%dv%d\n",
+ __func__, dom->domain_id, vc->vcpu_id);
+
+ if ( ! is_idle_vcpu(vc) )
+ {
/* FIXME: Do we need the private lock here? */
- list_add_tail(&svc->sdom_elem, &sdom->vcpu);
+ list_add_tail(&svc->sdom_elem, &svc->sdom->vcpu);
/* Add vcpu to runqueue of initial processor */
/* FIXME: Abstract for multiple runqueues */
vcpu_schedule_lock_irq(vc);
- list_add_tail(&svc->rqd_elem, &RQD(vc->processor)->svc);
- update_max_weight(RQD(vc->processor), svc->weight, 0);
+ list_add_tail(&svc->rqd_elem, &RQD(ops, vc->processor)->svc);
+ update_max_weight(RQD(ops, vc->processor), svc->weight, 0);
vcpu_schedule_unlock_irq(vc);
sdom->nr_vcpus++;
}
- else
- {
- BUG_ON( sdom != NULL );
- svc->credit = CSCHED_IDLE_CREDIT;
- svc->weight = 0;
- }
CSCHED_VCPU_CHECK(vc);
- return 0;
}
static void
-csched_vcpu_destroy(struct vcpu *vc)
+csched_free_vdata(struct scheduler *ops, void *priv)
+{
+ struct csched_vcpu *svc = priv;
+ struct vcpu *vc = svc->vcpu;
+
+ if ( ! is_idle_vcpu(vc) )
+ {
+ /* Remove from runqueue */
+ vcpu_schedule_lock_irq(vc);
+
+ list_del_init(&svc->rqd_elem);
+ update_max_weight(RQD(ops, vc->processor), 0, svc->weight);
+
+ vcpu_schedule_unlock_irq(vc);
+
+ /* Remove from sdom list. Don't need a lock for this, as it's called
+ * syncronously when nothing else can happen. */
+ list_del_init(&svc->sdom_elem);
+
+ svc->sdom->nr_vcpus--;
+ }
+
+ xfree(svc);
+}
+
+static void
+csched_vcpu_destroy(struct scheduler *ops, struct vcpu *vc)
{
struct csched_vcpu * const svc = CSCHED_VCPU(vc);
struct csched_dom * const sdom = svc->sdom;
@@ -586,25 +623,11 @@ csched_vcpu_destroy(struct vcpu *vc)
BUG_ON( sdom == NULL );
BUG_ON( !list_empty(&svc->runq_elem) );
- /* Remove from runqueue */
- vcpu_schedule_lock_irq(vc);
-
- list_del_init(&svc->rqd_elem);
- update_max_weight(RQD(vc->processor), 0, svc->weight);
-
- vcpu_schedule_unlock_irq(vc);
-
- /* Remove from sdom list. Don't need a lock for this, as it's called
- * syncronously when nothing else can happen. */
- list_del_init(&svc->sdom_elem);
-
- sdom->nr_vcpus--;
-
- xfree(svc);
+ csched_free_vdata(ops, svc);
}
static void
-csched_vcpu_sleep(struct vcpu *vc)
+csched_vcpu_sleep(struct scheduler *ops, struct vcpu *vc)
{
struct csched_vcpu * const svc = CSCHED_VCPU(vc);
@@ -617,7 +640,7 @@ csched_vcpu_sleep(struct vcpu *vc)
}
static void
-csched_vcpu_wake(struct vcpu *vc)
+csched_vcpu_wake(struct scheduler *ops, struct vcpu *vc)
{
struct csched_vcpu * const svc = CSCHED_VCPU(vc);
const unsigned int cpu = vc->processor;
@@ -654,8 +677,8 @@ csched_vcpu_wake(struct vcpu *vc)
now = NOW();
/* Put the VCPU on the runq */
- runq_insert(cpu, svc);
- runq_tickle(cpu, svc, now);
+ runq_insert(ops, cpu, svc);
+ runq_tickle(ops, cpu, svc, now);
out:
d2printk("w-\n");
@@ -663,7 +686,7 @@ out:
}
static void
-csched_context_saved(struct vcpu *vc)
+csched_context_saved(struct scheduler *ops, struct vcpu *vc)
{
struct csched_vcpu * const svc = CSCHED_VCPU(vc);
@@ -688,15 +711,15 @@ csched_context_saved(struct vcpu *vc)
BUG_ON(__vcpu_on_runq(svc));
- runq_insert(cpu, svc);
- runq_tickle(cpu, svc, NOW());
+ runq_insert(ops, cpu, svc);
+ runq_tickle(ops, cpu, svc, NOW());
}
vcpu_schedule_unlock_irq(vc);
}
static int
-csched_cpu_pick(struct vcpu *vc)
+csched_cpu_pick(struct scheduler *ops, struct vcpu *vc)
{
/* FIXME: Chose a schedule group based on load */
/* FIXME: Migrate the vcpu to the new runqueue list, updating
@@ -706,10 +729,12 @@ csched_cpu_pick(struct vcpu *vc)
static int
csched_dom_cntl(
+ struct scheduler *ops,
struct domain *d,
struct xen_domctl_scheduler_op *op)
{
struct csched_dom * const sdom = CSCHED_DOM(d);
+ struct csched_private *prv = CSCHED_PRIV(ops);
unsigned long flags;
if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
@@ -727,7 +752,7 @@ csched_dom_cntl(
/* Must hold csched_priv lock to update sdom, runq lock to
* update csvcs. */
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&prv->lock, flags);
old_weight = sdom->weight;
@@ -744,32 +769,28 @@ csched_dom_cntl(
vcpu_schedule_lock_irq(svc->vcpu);
svc->weight = sdom->weight;
- update_max_weight(RQD(svc->vcpu->processor), svc->weight,
old_weight);
+ update_max_weight(RQD(ops, svc->vcpu->processor), svc->weight,
old_weight);
vcpu_schedule_unlock_irq(svc->vcpu);
}
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ spin_unlock_irqrestore(&prv->lock, flags);
}
}
return 0;
}
-static int
-csched_dom_init(struct domain *dom)
+static void *
+csched_alloc_domdata(struct scheduler *ops, struct domain *dom)
{
struct csched_dom *sdom;
int flags;
- printk("%s: Initializing domain %d\n", __func__, dom->domain_id);
-
- if ( is_idle_domain(dom) )
- return 0;
-
sdom = xmalloc(struct csched_dom);
if ( sdom == NULL )
- return -ENOMEM;
+ return NULL;
+ memset(sdom, 0, sizeof(*sdom));
/* Initialize credit and weight */
INIT_LIST_HEAD(&sdom->vcpu);
@@ -778,40 +799,65 @@ csched_dom_init(struct domain *dom)
sdom->weight = CSCHED_DEFAULT_WEIGHT;
sdom->nr_vcpus = 0;
+ spin_lock_irqsave(&CSCHED_PRIV(ops)->lock, flags);
+
+ list_add_tail(&sdom->sdom_elem, &CSCHED_PRIV(ops)->sdom);
+
+ spin_unlock_irqrestore(&CSCHED_PRIV(ops)->lock, flags);
+
+ return (void *)sdom;
+}
+
+static int
+csched_dom_init(struct scheduler *ops, struct domain *dom)
+{
+ struct csched_dom *sdom;
+
+ printk("%s: Initializing domain %d\n", __func__, dom->domain_id);
+
+ if ( is_idle_domain(dom) )
+ return 0;
+
+ sdom = csched_alloc_domdata(ops, dom);
+ if ( sdom == NULL )
+ return -ENOMEM;
+
dom->sched_priv = sdom;
-
- spin_lock_irqsave(&csched_priv.lock, flags);
-
- list_add_tail(&sdom->sdom_elem, &csched_priv.sdom);
-
- spin_unlock_irqrestore(&csched_priv.lock, flags);
return 0;
}
static void
-csched_dom_destroy(struct domain *dom)
+csched_free_domdata(struct scheduler *ops, void *data)
+{
+ int flags;
+ struct csched_dom *sdom = data;
+
+ spin_lock_irqsave(&CSCHED_PRIV(ops)->lock, flags);
+
+ list_del_init(&sdom->sdom_elem);
+
+ spin_unlock_irqrestore(&CSCHED_PRIV(ops)->lock, flags);
+
+ xfree(data);
+}
+
+static void
+csched_dom_destroy(struct scheduler *ops, struct domain *dom)
{
struct csched_dom *sdom = CSCHED_DOM(dom);
- int flags;
BUG_ON(!list_empty(&sdom->vcpu));
- spin_lock_irqsave(&csched_priv.lock, flags);
-
- list_del_init(&sdom->sdom_elem);
-
- spin_unlock_irqrestore(&csched_priv.lock, flags);
-
- xfree(CSCHED_DOM(dom));
+ csched_free_domdata(ops, CSCHED_DOM(dom));
}
/* How long should we let this vcpu run for? */
static s_time_t
-csched_runtime(int cpu, struct csched_vcpu *snext)
+csched_runtime(struct scheduler *ops, int cpu, struct csched_vcpu *snext)
{
s_time_t time = CSCHED_MAX_TIMER;
- struct csched_runqueue_data *rqd = RQD(cpu);
+ struct csched_runqueue_data *rqd = RQD(ops, cpu);
struct list_head *runq = &rqd->runq;
if ( is_idle_vcpu(snext->vcpu) )
@@ -851,10 +897,10 @@ void __dump_execstate(void *unused);
* fast for the common case.
*/
static struct task_slice
-csched_schedule(s_time_t now)
+csched_schedule(struct scheduler *ops, s_time_t now)
{
const int cpu = smp_processor_id();
- struct csched_runqueue_data *rqd = RQD(cpu);
+ struct csched_runqueue_data *rqd = RQD(ops, cpu);
struct list_head * const runq = &rqd->runq;
struct csched_vcpu * const scurr = CSCHED_VCPU(current);
struct csched_vcpu *snext = NULL;
@@ -927,7 +973,7 @@ csched_schedule(s_time_t now)
}
if ( !is_idle_vcpu(snext->vcpu) && snext->credit <= CSCHED_CREDIT_RESET )
- reset_credit(cpu, now);
+ reset_credit(ops, cpu, now);
#if 0
/*
@@ -955,7 +1001,7 @@ csched_schedule(s_time_t now)
/*
* Return task to run next...
*/
- ret.time = csched_runtime(cpu, snext);
+ ret.time = csched_runtime(ops, cpu, snext);
ret.task = snext->vcpu;
CSCHED_VCPU_CHECK(ret.task);
@@ -977,7 +1023,7 @@ csched_dump_vcpu(struct csched_vcpu *svc
}
static void
-csched_dump_pcpu(int cpu)
+csched_dump_pcpu(struct scheduler *ops, int cpu)
{
struct list_head *runq, *iter;
struct csched_vcpu *svc;
@@ -986,7 +1032,7 @@ csched_dump_pcpu(int cpu)
/* FIXME: Do locking properly for access to runqueue structures */
- runq = &RQD(cpu)->runq;
+ runq = &RQD(ops, cpu)->runq;
cpumask_scnprintf(cpustr, sizeof(cpustr), per_cpu(cpu_sibling_map,cpu));
printk(" sibling=%s, ", cpustr);
@@ -1014,22 +1060,23 @@ csched_dump_pcpu(int cpu)
}
static void
-csched_dump(void)
+csched_dump(struct scheduler *ops)
{
struct list_head *iter_sdom, *iter_svc;
+ struct csched_private *prv = CSCHED_PRIV(ops);
int loop;
printk("info:\n"
"\tncpus = %u\n"
"\tdefault-weight = %d\n",
- csched_priv.ncpus,
+ prv->ncpus,
CSCHED_DEFAULT_WEIGHT);
/* FIXME: Locking! */
printk("active vcpus:\n");
loop = 0;
- list_for_each( iter_sdom, &csched_priv.sdom )
+ list_for_each( iter_sdom, &prv->sdom )
{
struct csched_dom *sdom;
sdom = list_entry(iter_sdom, struct csched_dom, sdom_elem);
@@ -1046,42 +1093,49 @@ csched_dump(void)
}
static void
-make_runq_map(void)
+make_runq_map(struct scheduler *ops)
{
int cpu, cpu_count=0;
+ struct csched_private *prv = CSCHED_PRIV(ops);
/* FIXME: Read pcpu layout and do this properly */
for_each_possible_cpu( cpu )
{
- csched_priv.runq_map[cpu] = 0;
+ prv->runq_map[cpu] = 0;
cpu_count++;
}
- csched_priv.runq_count = 1;
+ prv->runq_count = 1;
/* Move to the init code...? */
- csched_priv.rqd[0].cpu_min = 0;
- csched_priv.rqd[0].cpu_max = cpu_count;
+ prv->rqd[0].cpu_min = 0;
+ prv->rqd[0].cpu_max = cpu_count;
}
-static void
-csched_init(void)
+static int
+csched_init(struct scheduler *ops, int pool0)
{
int i;
+ struct csched_private *prv;
printk("Initializing Credit2 scheduler\n" \
" WARNING: This is experimental software in development.\n" \
" Use at your own risk.\n");
- spin_lock_init(&csched_priv.lock);
- INIT_LIST_HEAD(&csched_priv.sdom);
+ prv = xmalloc(struct csched_private);
+ if ( prv == NULL )
+ return 1;
+ memset(prv, 0, sizeof(*prv));
- csched_priv.ncpus = 0;
+ spin_lock_init(&prv->lock);
+ INIT_LIST_HEAD(&prv->sdom);
- make_runq_map();
+ prv->ncpus = 0;
- for ( i=0; i<csched_priv.runq_count ; i++ )
+ make_runq_map(ops);
+
+ for ( i=0; i<prv->runq_count ; i++ )
{
- struct csched_runqueue_data *rqd = csched_priv.rqd + i;
+ struct csched_runqueue_data *rqd = prv->rqd + i;
rqd->max_weight = 1;
rqd->id = i;
@@ -1096,24 +1150,40 @@ csched_init(void)
spinlock_t *lock;
/* Point the per-cpu schedule lock to the runq_id lock */
- runq_id = csched_priv.runq_map[i];
+ runq_id = prv->runq_map[i];
lock = &per_cpu(schedule_data, runq_id)._lock;
per_cpu(schedule_data, i).schedule_lock = lock;
- csched_priv.ncpus++;
+ prv->ncpus++;
}
+
+ return 0;
}
+
+static void
+csched_deinit(struct scheduler *ops)
+{
+ struct csched_private *prv;
+
+ prv = CSCHED_PRIV(ops);
+ if ( prv != NULL )
+ xfree(prv);
+}
+
+
+static struct csched_private _csched_priv;
struct scheduler sched_credit2_def = {
.name = "SMP Credit Scheduler rev2",
.opt_name = "credit2",
.sched_id = XEN_SCHEDULER_CREDIT2,
+ .sched_data = &_csched_priv,
.init_domain = csched_dom_init,
.destroy_domain = csched_dom_destroy,
- .init_vcpu = csched_vcpu_init,
+ .insert_vcpu = csched_vcpu_insert,
.destroy_vcpu = csched_vcpu_destroy,
.sleep = csched_vcpu_sleep,
@@ -1128,4 +1198,9 @@ struct scheduler sched_credit2_def = {
.dump_cpu_state = csched_dump_pcpu,
.dump_settings = csched_dump,
.init = csched_init,
+ .deinit = csched_deinit,
+ .alloc_vdata = csched_alloc_vdata,
+ .free_vdata = csched_free_vdata,
+ .alloc_domdata = csched_alloc_domdata,
+ .free_domdata = csched_free_domdata,
};
diff -r fadf63ab49e7 xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c Mon Apr 19 17:57:28 2010 +0100
+++ b/xen/common/sched_sedf.c Tue Apr 20 11:10:40 2010 +0200
@@ -20,6 +20,9 @@
if ( (_f) <= SEDFLEVEL ) \
printk(_a ); \
} while ( 0 )
+
+#define SEDF_CPUONLINE(_pool) \
+ (((_pool) == NULL) ? &cpupool_free_cpus : &(_pool)->cpu_valid)
#ifndef NDEBUG
#define SEDF_STATS
@@ -132,7 +135,7 @@ struct sedf_cpu_info {
#define sedf_runnable(edom) (!(EDOM_INFO(edom)->status & SEDF_ASLEEP))
-static void sedf_dump_cpu_state(int i);
+static void sedf_dump_cpu_state(struct scheduler *ops, int i);
static inline int extraq_on(struct vcpu *d, int i)
{
@@ -329,30 +332,17 @@ static inline void __add_to_runqueue_sor
}
-static int sedf_init_vcpu(struct vcpu *v)
+static void *sedf_alloc_vdata(struct scheduler *ops, struct vcpu *v, void *dd)
{
struct sedf_vcpu_info *inf;
- if ( (v->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
- return -1;
- memset(v->sched_priv, 0, sizeof(struct sedf_vcpu_info));
+ inf = xmalloc(struct sedf_vcpu_info);
+ if ( inf == NULL )
+ return NULL;
- inf = EDOM_INFO(v);
+ memset(inf, 0, sizeof(struct sedf_vcpu_info));
inf->vcpu = v;
-
- /* Allocate per-CPU context if this is the first domain to be added. */
- if ( unlikely(per_cpu(schedule_data, v->processor).sched_priv == NULL) )
- {
- per_cpu(schedule_data, v->processor).sched_priv =
- xmalloc(struct sedf_cpu_info);
- BUG_ON(per_cpu(schedule_data, v->processor).sched_priv == NULL);
- memset(CPU_INFO(v->processor), 0, sizeof(*CPU_INFO(v->processor)));
- INIT_LIST_HEAD(WAITQ(v->processor));
- INIT_LIST_HEAD(RUNQ(v->processor));
- INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_PEN_Q));
- INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_UTIL_Q));
- }
-
+
/* Every VCPU gets an equal share of extratime by default. */
inf->deadl_abs = 0;
inf->latency = 0;
@@ -383,39 +373,88 @@ static int sedf_init_vcpu(struct vcpu *v
}
else
{
- EDOM_INFO(v)->deadl_abs = 0;
- EDOM_INFO(v)->status &= ~SEDF_ASLEEP;
+ inf->deadl_abs = 0;
+ inf->status &= ~SEDF_ASLEEP;
}
+
+ return inf;
+}
+
+static void *
+sedf_alloc_pdata(struct scheduler *ops, int cpu)
+{
+ struct sedf_cpu_info *spc;
+
+ spc = xmalloc(struct sedf_cpu_info);
+ BUG_ON(spc == NULL);
+ memset(spc, 0, sizeof(*spc));
+ INIT_LIST_HEAD(&spc->waitq);
+ INIT_LIST_HEAD(&spc->runnableq);
+ INIT_LIST_HEAD(&spc->extraq[EXTRA_PEN_Q]);
+ INIT_LIST_HEAD(&spc->extraq[EXTRA_UTIL_Q]);
+
+ return (void *)spc;
+}
+
+static void
+sedf_free_pdata(struct scheduler *ops, void *spc, int cpu)
+{
+ if ( spc == NULL )
+ return;
+
+ xfree(spc);
+}
+
+static void sedf_free_vdata(struct scheduler *ops, void *priv)
+{
+ xfree(priv);
+}
+
+static void sedf_destroy_vcpu(struct scheduler *ops, struct vcpu *v)
+{
+ sedf_free_vdata(ops, v->sched_priv);
+}
+
+static void *
+sedf_alloc_domdata(struct scheduler *ops, struct domain *d)
+{
+ void *mem;
+
+ mem = xmalloc(struct sedf_dom_info);
+ if ( mem == NULL )
+ return NULL;
+
+ memset(mem, 0, sizeof(struct sedf_dom_info));
+
+ return mem;
+}
+
+static int sedf_init_domain(struct scheduler *ops, struct domain *d)
+{
+ d->sched_priv = sedf_alloc_domdata(ops, d);
+ if ( d->sched_priv == NULL )
+ return -ENOMEM;
return 0;
}
-static void sedf_destroy_vcpu(struct vcpu *v)
+static void sedf_free_domdata(struct scheduler *ops, void *data)
{
- xfree(v->sched_priv);
+ xfree(data);
}
-static int sedf_init_domain(struct domain *d)
+static void sedf_destroy_domain(struct scheduler *ops, struct domain *d)
{
- d->sched_priv = xmalloc(struct sedf_dom_info);
- if ( d->sched_priv == NULL )
- return -ENOMEM;
-
- memset(d->sched_priv, 0, sizeof(struct sedf_dom_info));
-
- return 0;
+ sedf_free_domdata(ops, d->sched_priv);
}
-static void sedf_destroy_domain(struct domain *d)
-{
- xfree(d->sched_priv);
-}
-
-static int sedf_pick_cpu(struct vcpu *v)
+static int sedf_pick_cpu(struct scheduler *ops, struct vcpu *v)
{
cpumask_t online_affinity;
+ cpumask_t *online;
- cpus_and(online_affinity, v->cpu_affinity, cpu_online_map);
+ online = SEDF_CPUONLINE(v->domain->cpupool);
+ cpus_and(online_affinity, v->cpu_affinity, *online);
return first_cpu(online_affinity);
}
@@ -751,7 +790,7 @@ static struct task_slice sedf_do_extra_s
-timeslice for the current period used up
-domain on waitqueue has started it's period
-and various others ;) in general: determine which domain to run next*/
-static struct task_slice sedf_do_schedule(s_time_t now)
+static struct task_slice sedf_do_schedule(struct scheduler *ops, s_time_t now)
{
int cpu = smp_processor_id();
struct list_head *runq = RUNQ(cpu);
@@ -786,6 +825,13 @@ static struct task_slice sedf_do_schedul
}
check_waitq:
update_queues(now, runq, waitq);
+
+ if ( unlikely(!cpu_isset(cpu, *SEDF_CPUONLINE(per_cpu(cpupool, cpu)))) )
+ {
+ ret.task = IDLETASK(cpu);
+ ret.time = SECONDS(1);
+ goto sched_done;
+ }
/*now simply pick the first domain from the runqueue, which has the
earliest deadline, because the list is sorted*/
@@ -824,6 +870,7 @@ static struct task_slice sedf_do_schedul
extraq, cpu);
}
+ sched_done:
/*TODO: Do something USEFUL when this happens and find out, why it
still can happen!!!*/
if ( ret.time < 0)
@@ -841,7 +888,7 @@ static struct task_slice sedf_do_schedul
}
-static void sedf_sleep(struct vcpu *d)
+static void sedf_sleep(struct scheduler *ops, struct vcpu *d)
{
PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",
d->domain->domain_id, d->vcpu_id);
@@ -1060,7 +1107,7 @@ static inline int should_switch(struct v
return 1;
}
-static void sedf_wake(struct vcpu *d)
+static void sedf_wake(struct scheduler *ops, struct vcpu *d)
{
s_time_t now = NOW();
struct sedf_vcpu_info* inf = EDOM_INFO(d);
@@ -1213,8 +1260,8 @@ static void sedf_dump_domain(struct vcpu
}
-/* dumps all domains on hte specified cpu */
-static void sedf_dump_cpu_state(int i)
+/* dumps all domains on the specified cpu */
+static void sedf_dump_cpu_state(struct scheduler *ops, int i)
{
struct list_head *list, *queue, *tmp;
struct sedf_vcpu_info *d_inf;
@@ -1287,7 +1334,7 @@ static void sedf_dump_cpu_state(int i)
/* Adjusts periods and slices of the domains accordingly to their weights. */
-static int sedf_adjust_weights(struct xen_domctl_scheduler_op *cmd)
+static int sedf_adjust_weights(struct cpupool *c, struct
xen_domctl_scheduler_op *cmd)
{
struct vcpu *p;
struct domain *d;
@@ -1308,6 +1355,8 @@ static int sedf_adjust_weights(struct xe
rcu_read_lock(&domlist_read_lock);
for_each_domain( d )
{
+ if ( c != d->cpupool )
+ continue;
for_each_vcpu( d, p )
{
if ( EDOM_INFO(p)->weight )
@@ -1359,7 +1408,7 @@ static int sedf_adjust_weights(struct xe
/* set or fetch domain scheduling parameters */
-static int sedf_adjust(struct domain *p, struct xen_domctl_scheduler_op *op)
+static int sedf_adjust(struct scheduler *ops, struct domain *p, struct
xen_domctl_scheduler_op *op)
{
struct vcpu *v;
int rc;
@@ -1368,9 +1417,6 @@ static int sedf_adjust(struct domain *p,
"new slice %"PRIu64"\nlatency %"PRIu64" extra:%s\n",
p->domain_id, op->u.sedf.period, op->u.sedf.slice,
op->u.sedf.latency, (op->u.sedf.extratime)?"yes":"no");
-
- if ( !p->vcpu )
- return -EINVAL;
if ( op->cmd == XEN_DOMCTL_SCHEDOP_putinfo )
{
@@ -1421,7 +1467,7 @@ static int sedf_adjust(struct domain *p,
}
}
- rc = sedf_adjust_weights(op);
+ rc = sedf_adjust_weights(p->cpupool, op);
if ( rc )
return rc;
@@ -1449,7 +1495,7 @@ static int sedf_adjust(struct domain *p,
return 0;
}
-const struct scheduler sched_sedf_def = {
+struct scheduler sched_sedf_def = {
.name = "Simple EDF Scheduler",
.opt_name = "sedf",
.sched_id = XEN_SCHEDULER_SEDF,
@@ -1457,8 +1503,14 @@ const struct scheduler sched_sedf_def =
.init_domain = sedf_init_domain,
.destroy_domain = sedf_destroy_domain,
- .init_vcpu = sedf_init_vcpu,
.destroy_vcpu = sedf_destroy_vcpu,
+
+ .alloc_vdata = sedf_alloc_vdata,
+ .free_vdata = sedf_free_vdata,
+ .alloc_pdata = sedf_alloc_pdata,
+ .free_pdata = sedf_free_pdata,
+ .alloc_domdata = sedf_alloc_domdata,
+ .free_domdata = sedf_free_domdata,
.do_schedule = sedf_do_schedule,
.pick_cpu = sedf_pick_cpu,
diff -r fadf63ab49e7 xen/common/schedule.c
--- a/xen/common/schedule.c Mon Apr 19 17:57:28 2010 +0100
+++ b/xen/common/schedule.c Tue Apr 20 11:10:40 2010 +0200
@@ -53,11 +53,12 @@ static void poll_timer_fn(void *data);
/* This is global for now so that private implementations can reach it */
DEFINE_PER_CPU(struct schedule_data, schedule_data);
+DEFINE_PER_CPU(struct scheduler *, scheduler);
extern const struct scheduler sched_sedf_def;
extern const struct scheduler sched_credit_def;
extern const struct scheduler sched_credit2_def;
-static const struct scheduler *__initdata schedulers[] = {
+static const struct scheduler *schedulers[] = {
&sched_sedf_def,
&sched_credit_def,
&sched_credit2_def,
@@ -66,9 +67,15 @@ static const struct scheduler *__initdat
static struct scheduler __read_mostly ops;
-#define SCHED_OP(fn, ...) \
- (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ ) \
- : (typeof(ops.fn(__VA_ARGS__)))0 )
+#define SCHED_OP(opsptr, fn, ...) \
+ (( (opsptr)->fn != NULL ) ? (opsptr)->fn(opsptr, ##__VA_ARGS__ ) \
+ : (typeof((opsptr)->fn(opsptr, ##__VA_ARGS__)))0 )
+
+#define DOM2OP(_d) (((_d)->cpupool == NULL) ? &ops :
&((_d)->cpupool->sched))
+#define VCPU2OP(_v) (DOM2OP((_v)->domain))
+#define VCPU2ONLINE(_v) \
+ (((_v)->domain->cpupool == NULL) ? &cpu_online_map \
+ : &(_v)->domain->cpupool->cpu_valid)
static inline void trace_runstate_change(struct vcpu *v, int new_state)
{
@@ -209,7 +216,86 @@ int sched_init_vcpu(struct vcpu *v, unsi
TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
- return SCHED_OP(init_vcpu, v);
+ if ( unlikely(per_cpu(schedule_data, v->processor).sched_priv == NULL) )
+ {
+ per_cpu(schedule_data, v->processor).sched_priv =
+ SCHED_OP(DOM2OP(d), alloc_pdata, processor);
+ if ( per_cpu(schedule_data, v->processor).sched_priv == NULL )
+ return 1;
+ }
+
+ v->sched_priv = SCHED_OP(DOM2OP(d), alloc_vdata, v, d->sched_priv);
+ if ( v->sched_priv == NULL )
+ return 1;
+
+ if ( is_idle_domain(d) )
+ per_cpu(schedule_data, v->processor).sched_idlevpriv = v->sched_priv;
+
+ return 0;
+}
+
+int sched_move_domain(struct domain *d, struct cpupool *c)
+{
+ struct vcpu *v;
+ unsigned int new_p;
+ void **vcpu_priv;
+ void *domdata;
+
+ domdata = SCHED_OP(&(c->sched), alloc_domdata, d);
+ if ( domdata == NULL )
+ return -ENOMEM;
+
+ vcpu_priv = xmalloc_array(void *, d->max_vcpus);
+ if ( vcpu_priv == NULL )
+ {
+ SCHED_OP(&(c->sched), free_domdata, domdata);
+ return -ENOMEM;
+ }
+
+ memset(vcpu_priv, 0, d->max_vcpus * sizeof(void *));
+ for_each_vcpu ( d, v )
+ {
+ vcpu_priv[v->vcpu_id] = SCHED_OP(&(c->sched), alloc_vdata, v, domdata);
+ if ( vcpu_priv[v->vcpu_id] == NULL )
+ {
+ for_each_vcpu ( d, v )
+ {
+ if ( vcpu_priv[v->vcpu_id] != NULL )
+ xfree(vcpu_priv[v->vcpu_id]);
+ }
+ xfree(vcpu_priv);
+ SCHED_OP(&(c->sched), free_domdata, domdata);
+ return -ENOMEM;
+ }
+ }
+
+ domain_pause(d);
+
+ new_p = first_cpu(c->cpu_valid);
+ for_each_vcpu ( d, v )
+ {
+ migrate_timer(&v->periodic_timer, new_p);
+ migrate_timer(&v->singleshot_timer, new_p);
+ migrate_timer(&v->poll_timer, new_p);
+
+ SCHED_OP(VCPU2OP(v), destroy_vcpu, v);
+
+ cpus_setall(v->cpu_affinity);
+ v->processor = new_p;
+ v->sched_priv = vcpu_priv[v->vcpu_id];
+
+ new_p = cycle_cpu(new_p, c->cpu_valid);
+ }
+
+ d->cpupool = c;
+ SCHED_OP(DOM2OP(d), free_domdata, d->sched_priv);
+ d->sched_priv = domdata;
+
+ domain_unpause(d);
+
+ xfree(vcpu_priv);
+
+ return 0;
}
void sched_destroy_vcpu(struct vcpu *v)
@@ -219,17 +305,17 @@ void sched_destroy_vcpu(struct vcpu *v)
kill_timer(&v->poll_timer);
if ( test_and_clear_bool(v->is_urgent) )
atomic_dec(&per_cpu(schedule_data, v->processor).urgent_count);
- SCHED_OP(destroy_vcpu, v);
+ SCHED_OP(VCPU2OP(v), destroy_vcpu, v);
}
int sched_init_domain(struct domain *d)
{
- return SCHED_OP(init_domain, d);
+ return SCHED_OP(DOM2OP(d), init_domain, d);
}
void sched_destroy_domain(struct domain *d)
{
- SCHED_OP(destroy_domain, d);
+ SCHED_OP(DOM2OP(d), destroy_domain, d);
}
void vcpu_sleep_nosync(struct vcpu *v)
@@ -243,7 +329,7 @@ void vcpu_sleep_nosync(struct vcpu *v)
if ( v->runstate.state == RUNSTATE_runnable )
vcpu_runstate_change(v, RUNSTATE_offline, NOW());
- SCHED_OP(sleep, v);
+ SCHED_OP(VCPU2OP(v), sleep, v);
}
vcpu_schedule_unlock_irqrestore(v, flags);
@@ -271,7 +357,7 @@ void vcpu_wake(struct vcpu *v)
{
if ( v->runstate.state >= RUNSTATE_blocked )
vcpu_runstate_change(v, RUNSTATE_runnable, NOW());
- SCHED_OP(wake, v);
+ SCHED_OP(VCPU2OP(v), wake, v);
}
else if ( !test_bit(_VPF_blocked, &v->pause_flags) )
{
@@ -326,7 +412,7 @@ static void vcpu_migrate(struct vcpu *v)
/* Select new CPU. */
old_cpu = v->processor;
- new_cpu = SCHED_OP(pick_cpu, v);
+ new_cpu = SCHED_OP(VCPU2OP(v), pick_cpu, v);
/*
* Transfer urgency status to new CPU before switching CPUs, as once
@@ -369,19 +455,29 @@ void vcpu_force_reschedule(struct vcpu *
}
/*
- * This function is used by cpu_hotplug code from stop_machine context.
- * Hence we can avoid needing to take certain locks.
+ * This function is used by cpu_hotplug code from stop_machine context
+ * and from cpupools to switch schedulers on a cpu.
*/
-void cpu_disable_scheduler(void)
+int cpu_disable_scheduler(unsigned int cpu, int lock)
{
struct domain *d;
struct vcpu *v;
- unsigned int cpu = smp_processor_id();
+ struct cpupool *c;
+ int ret = 0;
+
+ c = per_cpu(cpupool, cpu);
+ if ( c == NULL )
+ return ret;
for_each_domain ( d )
{
+ if ( d->cpupool != c )
+ continue;
+
for_each_vcpu ( d, v )
{
+ if ( lock != 0 )
+ vcpu_schedule_lock_irq(v);
if ( (cpus_weight(v->cpu_affinity) == 1) &&
cpu_isset(cpu, v->cpu_affinity) )
{
@@ -395,26 +491,46 @@ void cpu_disable_scheduler(void)
* be chosen when the timer is next re-set.
*/
if ( v->singleshot_timer.cpu == cpu )
- migrate_timer(&v->singleshot_timer, 0);
+ {
+ int cpu_mig;
+
+ cpu_mig = first_cpu(c->cpu_valid);
+ if (cpu_mig == cpu)
+ cpu_mig = next_cpu(cpu_mig, c->cpu_valid);
+ migrate_timer(&v->singleshot_timer, cpu_mig);
+ }
if ( v->processor == cpu )
{
set_bit(_VPF_migrating, &v->pause_flags);
+ if ( lock != 0 )
+ vcpu_schedule_unlock_irq(v);
vcpu_sleep_nosync(v);
vcpu_migrate(v);
}
+ else if ( lock != 0 )
+ vcpu_schedule_unlock_irq(v);
+ /*
+ * A vcpu active in the hypervisor will not be migratable.
+ * The caller should try again after releasing and reaquiring
+ * all locks.
+ */
+ if ( v->processor == cpu )
+ ret = -EAGAIN;
}
}
+ return ret;
}
int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
{
cpumask_t online_affinity, old_affinity;
+ cpumask_t *online;
if ( v->domain->is_pinned )
return -EINVAL;
-
- cpus_and(online_affinity, *affinity, cpu_online_map);
+ online = VCPU2ONLINE(v);
+ cpus_and(online_affinity, *affinity, *online);
if ( cpus_empty(online_affinity) )
return -EINVAL;
@@ -723,7 +839,7 @@ long sched_adjust(struct domain *d, stru
struct vcpu *v;
long ret;
- if ( (op->sched_id != ops.sched_id) ||
+ if ( (op->sched_id != DOM2OP(d)->sched_id) ||
((op->cmd != XEN_DOMCTL_SCHEDOP_putinfo) &&
(op->cmd != XEN_DOMCTL_SCHEDOP_getinfo)) )
return -EINVAL;
@@ -750,7 +866,7 @@ long sched_adjust(struct domain *d, stru
if ( d == current->domain )
vcpu_schedule_lock_irq(current);
- if ( (ret = SCHED_OP(adjust, d, op)) == 0 )
+ if ( (ret = SCHED_OP(DOM2OP(d), adjust, d, op)) == 0 )
TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id);
if ( d == current->domain )
@@ -797,6 +913,7 @@ static void schedule(void)
{
struct vcpu *prev = current, *next = NULL;
s_time_t now = NOW();
+ struct scheduler *sched = this_cpu(scheduler);
struct schedule_data *sd;
struct task_slice next_slice;
@@ -812,7 +929,7 @@ static void schedule(void)
stop_timer(&sd->s_timer);
/* get policy-specific decision on scheduling... */
- next_slice = ops.do_schedule(now);
+ next_slice = sched->do_schedule(sched, now);
next = next_slice.task;
@@ -871,6 +988,10 @@ static void schedule(void)
update_vcpu_system_time(next);
vcpu_periodic_timer_work(next);
+ TRACE_4D(TRC_SCHED_SWITCH,
+ prev->domain->domain_id, prev->vcpu_id,
+ next->domain->domain_id, next->vcpu_id);
+
context_switch(prev, next);
}
@@ -884,7 +1005,7 @@ void context_saved(struct vcpu *prev)
/* Check for migration request /after/ clearing running flag. */
smp_mb();
- SCHED_OP(context_saved, prev);
+ SCHED_OP(VCPU2OP(prev), context_saved, prev);
if ( unlikely(test_bit(_VPF_migrating, &prev->pause_flags)) )
vcpu_migrate(prev);
@@ -920,20 +1041,25 @@ static void poll_timer_fn(void *data)
vcpu_unblock(v);
}
+/* Get scheduler by id */
+const struct scheduler *scheduler_get_by_id(unsigned int id)
+{
+ int i;
+
+ for ( i = 0; schedulers[i] != NULL; i++ )
+ {
+ if ( schedulers[i]->sched_id == id )
+ return schedulers[i];
+ }
+ return NULL;
+}
+
/* Initialise the data structures. */
void __init scheduler_init(void)
{
int i;
open_softirq(SCHEDULE_SOFTIRQ, schedule);
-
- for_each_possible_cpu ( i )
- {
- spin_lock_init(&per_cpu(schedule_data, i)._lock);
- per_cpu(schedule_data, i).schedule_lock
- = &per_cpu(schedule_data, i)._lock;
- init_timer(&per_cpu(schedule_data, i).s_timer, s_timer_fn, NULL, i);
- }
for ( i = 0; schedulers[i] != NULL; i++ )
{
@@ -948,43 +1074,125 @@ void __init scheduler_init(void)
ops = *schedulers[0];
}
+ for_each_possible_cpu ( i )
+ {
+ per_cpu(scheduler, i) = &ops;
+ spin_lock_init(&per_cpu(schedule_data, i)._lock);
+ per_cpu(schedule_data, i).schedule_lock
+ = &per_cpu(schedule_data, i)._lock;
+ init_timer(&per_cpu(schedule_data, i).s_timer, s_timer_fn, NULL, i);
+ }
+
printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
- SCHED_OP(init);
+ if ( SCHED_OP(&ops, init, 1) )
+ panic("scheduler returned error on init\n");
}
-void dump_runq(unsigned char key)
+/* switch scheduler on cpu */
+void schedule_cpu_switch(unsigned int cpu, struct cpupool *c)
{
- s_time_t now = NOW();
- int i;
unsigned long flags;
+ struct vcpu *v;
+ void *vpriv = NULL;
+ void *ppriv;
+ void *ppriv_old;
+ struct scheduler *old_ops;
+ struct scheduler *new_ops;
- local_irq_save(flags);
+ old_ops = per_cpu(scheduler, cpu);
+ new_ops = (c == NULL) ? &ops : &(c->sched);
+ v = per_cpu(schedule_data, cpu).idle;
+ ppriv = SCHED_OP(new_ops, alloc_pdata, cpu);
+ if ( c != NULL )
+ vpriv = SCHED_OP(new_ops, alloc_vdata, v, v->domain->sched_priv);
- printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name);
- SCHED_OP(dump_settings);
- printk("sched_smt_power_savings: %s\n",
- sched_smt_power_savings? "enabled":"disabled");
- printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now);
+ spin_lock_irqsave(per_cpu(schedule_data, cpu).schedule_lock, flags);
- for_each_online_cpu ( i )
+ if ( c == NULL )
+ {
+ vpriv = v->sched_priv;
+ v->sched_priv = per_cpu(schedule_data, cpu).sched_idlevpriv;
+ }
+ else
+ {
+ v->sched_priv = vpriv;
+ vpriv = NULL;
+ }
+ SCHED_OP(old_ops, tick_suspend, cpu);
+ per_cpu(scheduler, cpu) = new_ops;
+ ppriv_old = per_cpu(schedule_data, cpu).sched_priv;
+ per_cpu(schedule_data, cpu).sched_priv = ppriv;
+ SCHED_OP(new_ops, tick_resume, cpu);
+ SCHED_OP(new_ops, insert_vcpu, v);
+
+ spin_unlock_irqrestore(per_cpu(schedule_data, cpu).schedule_lock, flags);
+
+ if ( vpriv != NULL )
+ SCHED_OP(old_ops, free_vdata, vpriv);
+ SCHED_OP(old_ops, free_pdata, ppriv_old, cpu);
+}
+
+/* init scheduler global data */
+int schedule_init_global(char *name, struct scheduler *sched)
+{
+ int i;
+ const struct scheduler *data;
+
+ data = &ops;
+ for ( i = 0; (schedulers[i] != NULL) && (name != NULL) ; i++ )
+ {
+ if ( strcmp(schedulers[i]->opt_name, name) == 0 )
+ {
+ data = schedulers[i];
+ break;
+ }
+ }
+ memcpy(sched, data, sizeof(*sched));
+ return SCHED_OP(sched, init, 0);
+}
+
+/* deinitialize scheduler global data */
+void schedule_deinit_global(struct scheduler *sched)
+{
+ SCHED_OP(sched, deinit);
+}
+
+void schedule_dump(struct cpupool *c)
+{
+ int i;
+ struct scheduler *sched;
+ cpumask_t *cpus;
+
+ sched = (c == NULL) ? &ops : &(c->sched);
+ cpus = (c == NULL) ? &cpupool_free_cpus : &c->cpu_valid;
+ printk("Scheduler: %s (%s)\n", sched->name, sched->opt_name);
+ SCHED_OP(sched, dump_settings);
+
+ for_each_cpu_mask (i, *cpus)
{
spin_lock(per_cpu(schedule_data, i).schedule_lock);
printk("CPU[%02d] ", i);
- SCHED_OP(dump_cpu_state, i);
+ SCHED_OP(sched, dump_cpu_state, i);
spin_unlock(per_cpu(schedule_data, i).schedule_lock);
}
-
- local_irq_restore(flags);
}
void sched_tick_suspend(void)
{
- SCHED_OP(tick_suspend);
+ struct scheduler *sched;
+ unsigned int cpu = smp_processor_id();
+
+ sched = per_cpu(scheduler, cpu);
+ SCHED_OP(sched, tick_suspend, cpu);
}
void sched_tick_resume(void)
{
- SCHED_OP(tick_resume);
+ struct scheduler *sched;
+ unsigned int cpu = smp_processor_id();
+
+ sched = per_cpu(scheduler, cpu);
+ SCHED_OP(sched, tick_resume, cpu);
}
#ifdef CONFIG_COMPAT
diff -r fadf63ab49e7 xen/include/asm-x86/smp.h
--- a/xen/include/asm-x86/smp.h Mon Apr 19 17:57:28 2010 +0100
+++ b/xen/include/asm-x86/smp.h Tue Apr 20 11:10:40 2010 +0200
@@ -56,7 +56,6 @@ extern u32 cpu_2_logical_apicid[];
#define CPU_ONLINE 0x0002 /* CPU is up */
#define CPU_DEAD 0x0004 /* CPU is dead */
DECLARE_PER_CPU(int, cpu_state);
-extern spinlock_t(cpu_add_remove_lock);
#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
extern int cpu_down(unsigned int cpu);
diff -r fadf63ab49e7 xen/include/public/domctl.h
--- a/xen/include/public/domctl.h Mon Apr 19 17:57:28 2010 +0100
+++ b/xen/include/public/domctl.h Tue Apr 20 11:10:40 2010 +0200
@@ -35,7 +35,7 @@
#include "xen.h"
#include "grant_table.h"
-#define XEN_DOMCTL_INTERFACE_VERSION 0x00000006
+#define XEN_DOMCTL_INTERFACE_VERSION 0x00000007
struct xenctl_cpumap {
XEN_GUEST_HANDLE_64(uint8) bitmap;
@@ -60,10 +60,14 @@ struct xen_domctl_createdomain {
/* Should domain memory integrity be verifed by tboot during Sx? */
#define _XEN_DOMCTL_CDF_s3_integrity 2
#define XEN_DOMCTL_CDF_s3_integrity (1U<<_XEN_DOMCTL_CDF_s3_integrity)
- uint32_t flags;
/* Disable out-of-sync shadow page tables? */
#define _XEN_DOMCTL_CDF_oos_off 3
#define XEN_DOMCTL_CDF_oos_off (1U<<_XEN_DOMCTL_CDF_oos_off)
+ /* cpupool is specified (0 otherwise) */
+#define _XEN_DOMCTL_CDF_pool 4
+#define XEN_DOMCTL_CDF_pool (1U<<_XEN_DOMCTL_CDF_pool)
+ uint32_t flags;
+ uint32_t cpupool;
};
typedef struct xen_domctl_createdomain xen_domctl_createdomain_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t);
@@ -106,6 +110,7 @@ struct xen_domctl_getdomaininfo {
uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */
uint32_t ssidref;
xen_domain_handle_t handle;
+ uint32_t cpupool;
};
typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t);
@@ -785,6 +790,30 @@ typedef struct xen_domctl_mem_sharing_op
typedef struct xen_domctl_mem_sharing_op xen_domctl_mem_sharing_op_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_sharing_op_t);
+/*
+ * cpupool operations
+ */
+/* XEN_DOMCTL_cpupool_op */
+#define XEN_DOMCTL_CPUPOOL_OP_CREATE 1 /* C */
+#define XEN_DOMCTL_CPUPOOL_OP_DESTROY 2 /* D */
+#define XEN_DOMCTL_CPUPOOL_OP_INFO 3 /* I */
+#define XEN_DOMCTL_CPUPOOL_OP_ADDCPU 4 /* A */
+#define XEN_DOMCTL_CPUPOOL_OP_RMCPU 5 /* R */
+#define XEN_DOMCTL_CPUPOOL_OP_MOVEDOMAIN 6 /* M */
+#define XEN_DOMCTL_CPUPOOL_OP_FREEINFO 7 /* F */
+#define XEN_DOMCTL_CPUPOOL_PAR_ANY 0xFFFFFFFF
+struct xen_domctl_cpupool_op {
+ uint32_t op; /* IN */
+ uint32_t cpupool_id; /* IN: CDIARM OUT: CI */
+ uint32_t sched_id; /* IN: C OUT: I */
+ uint32_t domid; /* IN: M */
+ uint32_t cpu; /* IN: AR */
+ uint32_t n_dom; /* OUT: I */
+ struct xenctl_cpumap cpumap; /* OUT: IF */
+};
+typedef struct xen_domctl_cpupool_op xen_domctl_cpupool_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpupool_op_t);
+
struct xen_domctl {
uint32_t cmd;
@@ -846,6 +875,7 @@ struct xen_domctl {
#define XEN_DOMCTL_gettscinfo 59
#define XEN_DOMCTL_settscinfo 60
#define XEN_DOMCTL_getpageframeinfo3 61
+#define XEN_DOMCTL_cpupool_op 62
#define XEN_DOMCTL_gdbsx_guestmemio 1000
#define XEN_DOMCTL_gdbsx_pausevcpu 1001
#define XEN_DOMCTL_gdbsx_unpausevcpu 1002
@@ -894,6 +924,7 @@ struct xen_domctl {
struct xen_domctl_debug_op debug_op;
struct xen_domctl_mem_event_op mem_event_op;
struct xen_domctl_mem_sharing_op mem_sharing_op;
+ struct xen_domctl_cpupool_op cpupool_op;
#if defined(__i386__) || defined(__x86_64__)
struct xen_domctl_cpuid cpuid;
#endif
diff -r fadf63ab49e7 xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h Mon Apr 19 17:57:28 2010 +0100
+++ b/xen/include/xen/sched-if.h Tue Apr 20 11:10:40 2010 +0200
@@ -9,6 +9,15 @@
#define __XEN_SCHED_IF_H__
#include <xen/percpu.h>
+
+/* A global pointer to the initial cpupool (POOL0). */
+extern struct cpupool *cpupool0;
+
+/* cpus currently in no cpupool */
+extern cpumask_t cpupool_free_cpus;
+
+/* cpupool lock (used for cpu on/offline, too) */
+extern spinlock_t cpupool_lock;
/*
* In order to allow a scheduler to remap the lock->cpu mapping,
@@ -26,11 +35,14 @@ struct schedule_data {
struct vcpu *curr; /* current task */
struct vcpu *idle; /* idle task for this cpu */
void *sched_priv;
+ void *sched_idlevpriv; /* default scheduler vcpu data */
struct timer s_timer; /* scheduling timer */
atomic_t urgent_count; /* how many urgent vcpus */
} __cacheline_aligned;
DECLARE_PER_CPU(struct schedule_data, schedule_data);
+DECLARE_PER_CPU(struct scheduler *, scheduler);
+DECLARE_PER_CPU(struct cpupool *, cpupool);
static inline void vcpu_schedule_lock(struct vcpu *v)
{
@@ -78,29 +90,50 @@ struct scheduler {
char *name; /* full name for this scheduler */
char *opt_name; /* option name for this scheduler */
unsigned int sched_id; /* ID for this scheduler */
+ void *sched_data; /* global data pointer */
- void (*init) (void);
+ int (*init) (struct scheduler *, int);
+ void (*deinit) (struct scheduler *);
- int (*init_domain) (struct domain *);
- void (*destroy_domain) (struct domain *);
+ void (*free_vdata) (struct scheduler *, void *);
+ void * (*alloc_vdata) (struct scheduler *, struct vcpu *,
+ void *);
+ void (*free_pdata) (struct scheduler *, void *, int);
+ void * (*alloc_pdata) (struct scheduler *, int);
+ void (*free_domdata) (struct scheduler *, void *);
+ void * (*alloc_domdata) (struct scheduler *, struct domain *);
- int (*init_vcpu) (struct vcpu *);
- void (*destroy_vcpu) (struct vcpu *);
+ int (*init_domain) (struct scheduler *, struct domain *);
+ void (*destroy_domain) (struct scheduler *, struct domain *);
- void (*sleep) (struct vcpu *);
- void (*wake) (struct vcpu *);
- void (*context_saved) (struct vcpu *);
+ void (*insert_vcpu) (struct scheduler *, struct vcpu *);
+ void (*destroy_vcpu) (struct scheduler *, struct vcpu *);
- struct task_slice (*do_schedule) (s_time_t);
+ void (*sleep) (struct scheduler *, struct vcpu *);
+ void (*wake) (struct scheduler *, struct vcpu *);
+ void (*context_saved) (struct scheduler *, struct vcpu *);
- int (*pick_cpu) (struct vcpu *);
- int (*adjust) (struct domain *,
+ struct task_slice (*do_schedule) (struct scheduler *, s_time_t);
+
+ int (*pick_cpu) (struct scheduler *, struct vcpu *);
+ int (*adjust) (struct scheduler *, struct domain *,
struct xen_domctl_scheduler_op *);
- void (*dump_settings) (void);
- void (*dump_cpu_state) (int);
+ void (*dump_settings) (struct scheduler *);
+ void (*dump_cpu_state) (struct scheduler *, int);
- void (*tick_suspend) (void);
- void (*tick_resume) (void);
+ void (*tick_suspend) (struct scheduler *, unsigned int);
+ void (*tick_resume) (struct scheduler *, unsigned int);
};
+struct cpupool
+{
+ int cpupool_id;
+ cpumask_t cpu_valid; /* all cpus assigned to pool */
+ struct cpupool *next;
+ unsigned int n_dom;
+ struct scheduler sched;
+};
+
+const struct scheduler *scheduler_get_by_id(unsigned int id);
+
#endif /* __XEN_SCHED_IF_H__ */
diff -r fadf63ab49e7 xen/include/xen/sched.h
--- a/xen/include/xen/sched.h Mon Apr 19 17:57:28 2010 +0100
+++ b/xen/include/xen/sched.h Tue Apr 20 11:10:40 2010 +0200
@@ -213,6 +213,7 @@ struct domain
/* Scheduling. */
void *sched_priv; /* scheduler-specific data */
+ struct cpupool *cpupool;
struct domain *next_in_list;
struct domain *next_in_hashbucket;
@@ -377,7 +378,7 @@ static inline void get_knownalive_domain
}
struct domain *domain_create(
- domid_t domid, unsigned int domcr_flags, ssidref_t ssidref);
+ domid_t domid, int poolid, unsigned int domcr_flags, ssidref_t ssidref);
/* DOMCRF_hvm: Create an HVM domain, as opposed to a PV domain. */
#define _DOMCRF_hvm 0
#define DOMCRF_hvm (1U<<_DOMCRF_hvm)
@@ -465,6 +466,7 @@ void sched_destroy_vcpu(struct vcpu *v);
void sched_destroy_vcpu(struct vcpu *v);
int sched_init_domain(struct domain *d);
void sched_destroy_domain(struct domain *d);
+int sched_move_domain(struct domain *d, struct cpupool *c);
long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *);
int sched_id(void);
void sched_tick_suspend(void);
@@ -575,8 +577,13 @@ void domain_unpause_by_systemcontroller(
void domain_unpause_by_systemcontroller(struct domain *d);
void cpu_init(void);
+struct scheduler;
+
+int schedule_init_global(char *name, struct scheduler *sched);
+void schedule_deinit_global(struct scheduler *sched);
+void schedule_cpu_switch(unsigned int cpu, struct cpupool *c);
void vcpu_force_reschedule(struct vcpu *v);
-void cpu_disable_scheduler(void);
+int cpu_disable_scheduler(unsigned int cpu, int lock);
int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
@@ -607,6 +614,18 @@ extern enum cpufreq_controller {
FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
} cpufreq_controller;
+#define CPUPOOLID_NONE -1
+
+struct cpupool *cpupool_create(int poolid, char *sched);
+int cpupool_destroy(struct cpupool *c);
+int cpupool0_cpu_assign(struct cpupool *c);
+int cpupool_assign_ncpu(struct cpupool *c, int ncpu);
+void cpupool_cpu_add(unsigned int cpu);
+int cpupool_add_domain(struct domain *d, int poolid);
+void cpupool_rm_domain(struct domain *d);
+int cpupool_do_domctl(struct xen_domctl_cpupool_op *op);
+#define num_cpupool_cpus(c) (cpus_weight((c)->cpu_valid))
+
#endif /* __SCHED_H__ */
/*
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|