Signed-off-by: juergen.gross@xxxxxxxxxxxxxx
--
Juergen Gross Principal Developer Operating Systems
TSP ES&S SWE OS6 Telephone: +49 (0) 89 636 47950
Fujitsu Technolgy Solutions e-mail: juergen.gross@xxxxxxxxxxxxxx
Otto-Hahn-Ring 6 Internet: ts.fujitsu.com
D-81739 Muenchen Company details: ts.fujitsu.com/imprint.html
diff -r 655dc3bc1d8e xen/arch/x86/acpi/cpu_idle.c
--- a/xen/arch/x86/acpi/cpu_idle.c Thu Apr 16 11:54:06 2009 +0100
+++ b/xen/arch/x86/acpi/cpu_idle.c Thu Apr 16 15:04:13 2009 +0200
@@ -198,7 +198,7 @@ static void acpi_processor_idle(void)
cpufreq_dbs_timer_suspend();
- sched_tick_suspend();
+ sched_tick_suspend(smp_processor_id());
/*
* sched_tick_suspend may raise TIMER_SOFTIRQ by __stop_timer,
* which will break the later assumption of no sofirq pending,
@@ -216,7 +216,7 @@ static void acpi_processor_idle(void)
if ( softirq_pending(smp_processor_id()) )
{
local_irq_enable();
- sched_tick_resume();
+ sched_tick_resume(smp_processor_id());
cpufreq_dbs_timer_resume();
return;
}
@@ -237,7 +237,7 @@ static void acpi_processor_idle(void)
pm_idle_save();
else
acpi_safe_halt();
- sched_tick_resume();
+ sched_tick_resume(smp_processor_id());
cpufreq_dbs_timer_resume();
return;
}
@@ -345,7 +345,7 @@ static void acpi_processor_idle(void)
default:
local_irq_enable();
- sched_tick_resume();
+ sched_tick_resume(smp_processor_id());
cpufreq_dbs_timer_resume();
return;
}
@@ -357,7 +357,7 @@ static void acpi_processor_idle(void)
cx->time += sleep_ticks;
}
- sched_tick_resume();
+ sched_tick_resume(smp_processor_id());
cpufreq_dbs_timer_resume();
if ( cpuidle_current_governor->reflect )
diff -r 655dc3bc1d8e xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Thu Apr 16 11:54:06 2009 +0100
+++ b/xen/arch/x86/domain.c Thu Apr 09 11:58:17 2009 +0200
@@ -1412,7 +1412,13 @@ struct migrate_info {
void (*saved_schedule_tail)(struct vcpu *);
cpumask_t saved_affinity;
unsigned int nest;
+ int borrowed;
};
+
+long continue_hypercall_on_cpu_dummy(void *data)
+{
+ return 0;
+}
static void continue_hypercall_on_cpu_helper(struct vcpu *v)
{
@@ -1420,8 +1426,16 @@ static void continue_hypercall_on_cpu_he
struct migrate_info *info = v->arch.continue_info;
cpumask_t mask = info->saved_affinity;
void (*saved_schedule_tail)(struct vcpu *) = info->saved_schedule_tail;
+ int cpu = -1;
regs->eax = info->func(info->data);
+
+ if ( (info->nest == 0) && info->borrowed &&
+ (cpu = cpupool_return_cpu(v->domain->cpupool) >= 0) )
+ {
+ continue_hypercall_on_cpu(cpu, continue_hypercall_on_cpu_dummy,
+ info->data);
+ }
if ( info->nest-- == 0 )
{
@@ -1440,27 +1454,32 @@ int continue_hypercall_on_cpu(int cpu, l
struct migrate_info *info;
cpumask_t mask = cpumask_of_cpu(cpu);
int rc;
+ int borrowed = 0;
if ( cpu == smp_processor_id() )
return func(data);
+ borrowed = cpupool_borrow_cpu(v->domain->cpupool, cpu);
+
info = v->arch.continue_info;
if ( info == NULL )
{
info = xmalloc(struct migrate_info);
+ rc = -ENOMEM;
if ( info == NULL )
- return -ENOMEM;
+ goto out;
rc = vcpu_lock_affinity(v, &mask);
if ( rc )
{
xfree(info);
- return rc;
+ goto out;
}
info->saved_schedule_tail = v->arch.schedule_tail;
info->saved_affinity = mask;
info->nest = 0;
+ info->borrowed = 0;
v->arch.schedule_tail = continue_hypercall_on_cpu_helper;
v->arch.continue_info = info;
@@ -1470,16 +1489,22 @@ int continue_hypercall_on_cpu(int cpu, l
BUG_ON(info->nest != 0);
rc = vcpu_locked_change_affinity(v, &mask);
if ( rc )
- return rc;
+ goto out;
info->nest++;
}
+ info->borrowed += borrowed;
info->func = func;
info->data = data;
/* Dummy return value will be overwritten by new schedule_tail. */
BUG_ON(!test_bit(SCHEDULE_SOFTIRQ, &softirq_pending(smp_processor_id())));
return 0;
+
+out:
+ if ( borrowed )
+ cpupool_return_cpu(v->domain->cpupool);
+ return rc;
}
#define next_arg(fmt, args) ({ \
diff -r 655dc3bc1d8e xen/arch/x86/domain_build.c
--- a/xen/arch/x86/domain_build.c Thu Apr 16 11:54:06 2009 +0100
+++ b/xen/arch/x86/domain_build.c Thu Apr 09 11:58:46 2009 +0200
@@ -9,6 +9,7 @@
#include <xen/lib.h>
#include <xen/ctype.h>
#include <xen/sched.h>
+#include <xen/sched-if.h>
#include <xen/smp.h>
#include <xen/delay.h>
#include <xen/event.h>
@@ -706,13 +707,13 @@ int __init construct_dom0(
shared_info(d, vcpu_info[i].evtchn_upcall_mask) = 1;
if ( opt_dom0_max_vcpus == 0 )
- opt_dom0_max_vcpus = num_online_cpus();
+ opt_dom0_max_vcpus = num_cpupool_cpus(cpupool0);
if ( opt_dom0_max_vcpus > MAX_VIRT_CPUS )
opt_dom0_max_vcpus = MAX_VIRT_CPUS;
printk("Dom0 has maximum %u VCPUs\n", opt_dom0_max_vcpus);
for ( i = 1; i < opt_dom0_max_vcpus; i++ )
- (void)alloc_vcpu(d, i, i % num_online_cpus());
+ (void)alloc_vcpu(d, i, i % num_cpupool_cpus(cpupool0));
/* Set up CR3 value for write_ptbase */
if ( paging_mode_enabled(d) )
diff -r 655dc3bc1d8e xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Thu Apr 16 11:54:06 2009 +0100
+++ b/xen/arch/x86/mm.c Thu Apr 09 12:00:02 2009 +0200
@@ -212,7 +212,7 @@ void __init arch_init_memory(void)
* Any Xen-heap pages that we will allow to be mapped will have
* their domain field set to dom_xen.
*/
- dom_xen = domain_create(DOMID_XEN, DOMCRF_dummy, 0);
+ dom_xen = domain_create(DOMID_XEN, CPUPOOLID_NONE, DOMCRF_dummy, 0);
BUG_ON(dom_xen == NULL);
/*
@@ -220,7 +220,7 @@ void __init arch_init_memory(void)
* This domain owns I/O pages that are within the range of the page_info
* array. Mappings occur at the priv of the caller.
*/
- dom_io = domain_create(DOMID_IO, DOMCRF_dummy, 0);
+ dom_io = domain_create(DOMID_IO, CPUPOOLID_NONE, DOMCRF_dummy, 0);
BUG_ON(dom_io == NULL);
/* First 1MB of RAM is historically marked as I/O. */
diff -r 655dc3bc1d8e xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c Thu Apr 16 11:54:06 2009 +0100
+++ b/xen/arch/x86/setup.c Thu Apr 16 08:20:11 2009 +0200
@@ -2,6 +2,7 @@
#include <xen/init.h>
#include <xen/lib.h>
#include <xen/sched.h>
+#include <xen/sched-if.h>
#include <xen/domain.h>
#include <xen/serial.h>
#include <xen/softirq.h>
@@ -232,7 +233,7 @@ static void __init init_idle_domain(void
/* Domain creation requires that scheduler structures are initialised. */
scheduler_init();
- idle_domain = domain_create(IDLE_DOMAIN_ID, 0, 0);
+ idle_domain = domain_create(IDLE_DOMAIN_ID, CPUPOOLID_NONE, 0, 0);
if ( (idle_domain == NULL) || (alloc_vcpu(idle_domain, 0, 0) == NULL) )
BUG();
@@ -995,8 +996,12 @@ void __init __start_xen(unsigned long mb
if ( !tboot_protect_mem_regions() )
panic("Could not protect TXT memory regions\n");
+ /* Create initial cpupool 0. */
+ cpupool0 = cpupool_create(0, NULL);
+ if ( (cpupool0 == NULL) || cpupool0_cpu_assign(cpupool0) )
+ panic("Error creating cpupool 0\n");
/* Create initial domain 0. */
- dom0 = domain_create(0, DOMCRF_s3_integrity, DOM0_SSIDREF);
+ dom0 = domain_create(0, 0, DOMCRF_s3_integrity, DOM0_SSIDREF);
if ( (dom0 == NULL) || (alloc_vcpu(dom0, 0, 0) == NULL) )
panic("Error creating domain 0\n");
diff -r 655dc3bc1d8e xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c Thu Apr 16 11:54:06 2009 +0100
+++ b/xen/arch/x86/smpboot.c Thu Apr 09 12:04:14 2009 +0200
@@ -1265,7 +1265,7 @@ int __cpu_disable(void)
/* It's now safe to remove this processor from the online map */
cpu_clear(cpu, cpu_online_map);
- cpu_disable_scheduler();
+ cpu_disable_scheduler(cpu, 0);
return 0;
}
@@ -1299,7 +1299,7 @@ int cpu_down(unsigned int cpu)
int err = 0;
spin_lock(&cpu_add_remove_lock);
- if (num_online_cpus() == 1) {
+ if (cpupool_cpu_remove(cpu)) {
err = -EBUSY;
goto out;
}
@@ -1451,6 +1451,7 @@ int __devinit __cpu_up(unsigned int cpu)
process_pending_timers();
}
+ cpupool_cpu_add(cpu);
cpufreq_add_cpu(cpu);
return 0;
}
diff -r 655dc3bc1d8e xen/common/Makefile
--- a/xen/common/Makefile Thu Apr 16 11:54:06 2009 +0100
+++ b/xen/common/Makefile Thu Apr 09 12:04:41 2009 +0200
@@ -1,4 +1,5 @@ obj-y += bitmap.o
obj-y += bitmap.o
+obj-y += cpupool.o
obj-y += domctl.o
obj-y += domain.o
obj-y += event_channel.o
diff -r 655dc3bc1d8e xen/common/domain.c
--- a/xen/common/domain.c Thu Apr 16 11:54:06 2009 +0100
+++ b/xen/common/domain.c Thu Apr 09 13:45:33 2009 +0200
@@ -187,7 +187,7 @@ struct vcpu *alloc_idle_vcpu(unsigned in
return v;
d = (vcpu_id == 0) ?
- domain_create(IDLE_DOMAIN_ID, 0, 0) :
+ domain_create(IDLE_DOMAIN_ID, CPUPOOLID_NONE, 0, 0) :
idle_vcpu[cpu_id - vcpu_id]->domain;
BUG_ON(d == NULL);
@@ -198,7 +198,7 @@ struct vcpu *alloc_idle_vcpu(unsigned in
}
struct domain *domain_create(
- domid_t domid, unsigned int domcr_flags, ssidref_t ssidref)
+ domid_t domid, int poolid, unsigned int domcr_flags, ssidref_t ssidref)
{
struct domain *d, **pd;
enum { INIT_xsm = 1u<<0, INIT_rangeset = 1u<<1, INIT_evtchn = 1u<<2,
@@ -259,6 +259,9 @@ struct domain *domain_create(
d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
d->irq_caps = rangeset_new(d, "Interrupts", 0);
if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) )
+ goto fail;
+
+ if ( cpupool_add_domain(d, poolid) != 0 )
goto fail;
if ( sched_init_domain(d) != 0 )
@@ -564,6 +567,8 @@ static void complete_domain_destroy(stru
sched_destroy_domain(d);
+ cpupool_rm_domain(d);
+
/* Free page used by xen oprofile buffer. */
free_xenoprof_pages(d);
diff -r 655dc3bc1d8e xen/common/domctl.c
--- a/xen/common/domctl.c Thu Apr 16 11:54:06 2009 +0100
+++ b/xen/common/domctl.c Thu Apr 16 08:20:11 2009 +0200
@@ -11,6 +11,7 @@
#include <xen/lib.h>
#include <xen/mm.h>
#include <xen/sched.h>
+#include <xen/sched-if.h>
#include <xen/domain.h>
#include <xen/event.h>
#include <xen/domain_page.h>
@@ -138,15 +139,18 @@ void getdomaininfo(struct domain *d, str
info->max_pages = d->max_pages;
info->shared_info_frame = mfn_to_gmfn(d, __pa(d->shared_info)>>PAGE_SHIFT);
+ info->cpupool = d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE;
+
memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t));
}
-static unsigned int default_vcpu0_location(void)
+static unsigned int default_vcpu0_location(struct domain *dom)
{
struct domain *d;
struct vcpu *v;
unsigned int i, cpu, nr_cpus, *cnt;
cpumask_t cpu_exclude_map;
+ cpumask_t online;
/* Do an initial CPU placement. Pick the least-populated CPU. */
nr_cpus = last_cpu(cpu_possible_map) + 1;
@@ -171,7 +175,8 @@ static unsigned int default_vcpu0_locati
if ( cpus_weight(cpu_sibling_map[0]) > 1 )
cpu = next_cpu(cpu, cpu_sibling_map[0]);
cpu_exclude_map = cpu_sibling_map[0];
- for_each_online_cpu ( i )
+ online = (dom->cpupool == NULL) ? cpu_online_map : dom->cpupool->cpu_valid;
+ for_each_cpu_mask(i, online)
{
if ( cpu_isset(i, cpu_exclude_map) )
continue;
@@ -366,12 +371,13 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
domid_t dom;
static domid_t rover = 0;
unsigned int domcr_flags;
+ int pool = 0;
ret = -EINVAL;
if ( supervisor_mode_kernel ||
(op->u.createdomain.flags &
~(XEN_DOMCTL_CDF_hvm_guest | XEN_DOMCTL_CDF_hap |
- XEN_DOMCTL_CDF_s3_integrity)) )
+ XEN_DOMCTL_CDF_s3_integrity | XEN_DOMCTL_CDF_pool)) )
break;
dom = op->domain;
@@ -405,9 +411,11 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
domcr_flags |= DOMCRF_hap;
if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_s3_integrity )
domcr_flags |= DOMCRF_s3_integrity;
+ if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_pool )
+ pool = op->u.createdomain.cpupool;
ret = -ENOMEM;
- d = domain_create(dom, domcr_flags, op->u.createdomain.ssidref);
+ d = domain_create(dom, pool, domcr_flags, op->u.createdomain.ssidref);
if ( d == NULL )
break;
@@ -426,6 +434,7 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
{
struct domain *d;
unsigned int i, max = op->u.max_vcpus.max, cpu;
+ cpumask_t online;
ret = -ESRCH;
if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
@@ -455,14 +464,15 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
goto maxvcpu_out;
ret = -ENOMEM;
+ online = (d->cpupool == NULL) ? cpu_online_map : d->cpupool->cpu_valid;
for ( i = 0; i < max; i++ )
{
if ( d->vcpu[i] != NULL )
continue;
cpu = (i == 0) ?
- default_vcpu0_location() :
- cycle_cpu(d->vcpu[i-1]->processor, cpu_online_map);
+ default_vcpu0_location(d) :
+ cycle_cpu(d->vcpu[i-1]->processor, online);
if ( alloc_vcpu(d, i, cpu) == NULL )
goto maxvcpu_out;
@@ -890,6 +900,14 @@ long do_domctl(XEN_GUEST_HANDLE(xen_domc
}
break;
+ case XEN_DOMCTL_cpupool_op:
+ {
+ ret = cpupool_do_domctl(op);
+ if ( (ret == 0) && copy_to_guest(u_domctl, op, 1) )
+ ret = -EFAULT;
+ }
+ break;
+
default:
ret = arch_do_domctl(op, u_domctl);
break;
diff -r 655dc3bc1d8e xen/common/sched_credit.c
--- a/xen/common/sched_credit.c Thu Apr 16 11:54:06 2009 +0100
+++ b/xen/common/sched_credit.c Thu Apr 16 09:41:15 2009 +0200
@@ -69,11 +69,15 @@
/*
* Useful macros
*/
+#define CSCHED_PRIV(_ops) \
+ ((struct csched_private *)((_ops)->sched_data))
#define CSCHED_PCPU(_c) \
((struct csched_pcpu *)per_cpu(schedule_data, _c).sched_priv)
#define CSCHED_VCPU(_vcpu) ((struct csched_vcpu *) (_vcpu)->sched_priv)
#define CSCHED_DOM(_dom) ((struct csched_dom *) (_dom)->sched_priv)
#define RUNQ(_cpu) (&(CSCHED_PCPU(_cpu)->runq))
+#define CSCHED_CPUONLINE(_pool) \
+ (((_pool) == NULL) ? cpupool_free_cpus : (_pool)->cpu_valid)
/*
@@ -157,10 +161,12 @@ struct csched_private {
struct timer master_ticker;
unsigned int master;
cpumask_t idlers;
+ cpumask_t cpus;
uint32_t weight;
uint32_t credit;
int credit_balance;
uint32_t runq_sort;
+ int ticker_active;
};
@@ -168,8 +174,10 @@ struct csched_private {
* Global variables
*/
static struct csched_private csched_priv;
+static struct csched_private *csched_priv0 = NULL;
static void csched_tick(void *_cpu);
+static void csched_acct(void *dummy);
static inline int
__vcpu_on_runq(struct csched_vcpu *svc)
@@ -214,6 +222,7 @@ __runq_tickle(unsigned int cpu, struct c
{
struct csched_vcpu * const cur =
CSCHED_VCPU(per_cpu(schedule_data, cpu).curr);
+ struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
cpumask_t mask;
ASSERT(cur);
@@ -240,14 +249,14 @@ __runq_tickle(unsigned int cpu, struct c
*/
if ( cur->pri > CSCHED_PRI_IDLE )
{
- if ( cpus_empty(csched_priv.idlers) )
+ if ( cpus_empty(prv->idlers) )
{
CSCHED_STAT_CRANK(tickle_idlers_none);
}
else
{
CSCHED_STAT_CRANK(tickle_idlers_some);
- cpus_or(mask, mask, csched_priv.idlers);
+ cpus_or(mask, mask, prv->idlers);
cpus_and(mask, mask, new->vcpu->cpu_affinity);
}
}
@@ -257,38 +266,78 @@ __runq_tickle(unsigned int cpu, struct c
cpumask_raise_softirq(mask, SCHEDULE_SOFTIRQ);
}
-static int
-csched_pcpu_init(int cpu)
+static void
+csched_free_pdata(struct scheduler *ops, void *pcpu, int cpu)
+{
+ struct csched_private *prv = CSCHED_PRIV(ops);
+ struct csched_pcpu *spc = pcpu;
+ unsigned long flags;
+
+ if ( spc == NULL )
+ return;
+
+ spin_lock_irqsave(&prv->lock, flags);
+
+ prv->credit -= CSCHED_CREDITS_PER_ACCT;
+ prv->ncpus--;
+ cpu_clear(cpu, prv->idlers);
+ cpu_clear(cpu, prv->cpus);
+ if ( (prv->master == cpu) && (prv->ncpus > 0) )
+ {
+ prv->master = first_cpu(prv->cpus);
+ migrate_timer(&prv->master_ticker, prv->master);
+ }
+ kill_timer(&spc->ticker);
+ if ( prv->ncpus == 0 )
+ kill_timer(&prv->master_ticker);
+
+ spin_unlock_irqrestore(&prv->lock, flags);
+
+ xfree(spc);
+}
+
+static void *
+csched_alloc_pdata(struct scheduler *ops, int cpu)
{
struct csched_pcpu *spc;
+ struct csched_private *prv = CSCHED_PRIV(ops);
unsigned long flags;
/* Allocate per-PCPU info */
spc = xmalloc(struct csched_pcpu);
if ( spc == NULL )
- return -1;
-
- spin_lock_irqsave(&csched_priv.lock, flags);
+ return NULL;
+
+ spin_lock_irqsave(&prv->lock, flags);
/* Initialize/update system-wide config */
- csched_priv.credit += CSCHED_CREDITS_PER_ACCT;
- if ( csched_priv.ncpus <= cpu )
- csched_priv.ncpus = cpu + 1;
- if ( csched_priv.master >= csched_priv.ncpus )
- csched_priv.master = cpu;
+ prv->credit += CSCHED_CREDITS_PER_ACCT;
+ prv->ncpus++;
+ cpu_set(cpu, prv->cpus);
+ if ( (prv->ncpus == 1) && (prv != csched_priv0) )
+ {
+ prv->master = cpu;
+ init_timer( &prv->master_ticker, csched_acct, prv, cpu);
+ prv->ticker_active = 2;
+ }
init_timer(&spc->ticker, csched_tick, (void *)(unsigned long)cpu, cpu);
+
+ if ( prv == csched_priv0 )
+ prv->master = first_cpu(prv->cpus);
+
INIT_LIST_HEAD(&spc->runq);
- spc->runq_sort_last = csched_priv.runq_sort;
- per_cpu(schedule_data, cpu).sched_priv = spc;
+ spc->runq_sort_last = prv->runq_sort;
+ if ( per_cpu(schedule_data, cpu).sched_priv == NULL )
+ per_cpu(schedule_data, cpu).sched_priv = spc;
/* Start off idling... */
BUG_ON(!is_idle_vcpu(per_cpu(schedule_data, cpu).curr));
- cpu_set(cpu, csched_priv.idlers);
-
- spin_unlock_irqrestore(&csched_priv.lock, flags);
-
- return 0;
+ cpu_set(cpu, prv->idlers);
+
+ spin_unlock_irqrestore(&prv->lock, flags);
+
+ return spc;
}
#ifndef NDEBUG
@@ -361,17 +410,19 @@ __csched_vcpu_is_migrateable(struct vcpu
}
static int
-csched_cpu_pick(struct vcpu *vc)
+csched_cpu_pick(struct scheduler *ops, struct vcpu *vc)
{
cpumask_t cpus;
cpumask_t idlers;
+ cpumask_t online;
int cpu;
/*
* Pick from online CPUs in VCPU's affinity mask, giving a
* preference to its current processor if it's in there.
*/
- cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
+ online = CSCHED_CPUONLINE(vc->domain->cpupool);
+ cpus_and(cpus, online, vc->cpu_affinity);
cpu = cpu_isset(vc->processor, cpus)
? vc->processor
: cycle_cpu(vc->processor, cpus);
@@ -389,7 +440,7 @@ csched_cpu_pick(struct vcpu *vc)
* like run two VCPUs on co-hyperthreads while there are idle cores
* or sockets.
*/
- idlers = csched_priv.idlers;
+ idlers = CSCHED_PRIV(ops)->idlers;
cpu_set(cpu, idlers);
cpus_and(cpus, cpus, idlers);
cpu_clear(cpu, cpus);
@@ -433,12 +484,12 @@ csched_cpu_pick(struct vcpu *vc)
}
static inline void
-__csched_vcpu_acct_start(struct csched_vcpu *svc)
+__csched_vcpu_acct_start(struct csched_private *prv, struct csched_vcpu *svc)
{
struct csched_dom * const sdom = svc->sdom;
unsigned long flags;
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&(prv->lock), flags);
if ( list_empty(&svc->active_vcpu_elem) )
{
@@ -449,16 +500,17 @@ __csched_vcpu_acct_start(struct csched_v
list_add(&svc->active_vcpu_elem, &sdom->active_vcpu);
if ( list_empty(&sdom->active_sdom_elem) )
{
- list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
- csched_priv.weight += sdom->weight;
- }
- }
-
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ list_add(&sdom->active_sdom_elem, &(prv->active_sdom));
+ prv->weight += sdom->weight;
+ }
+ }
+
+ spin_unlock_irqrestore(&(prv->lock), flags);
}
static inline void
-__csched_vcpu_acct_stop_locked(struct csched_vcpu *svc)
+__csched_vcpu_acct_stop_locked(struct csched_private *prv,
+ struct csched_vcpu *svc)
{
struct csched_dom * const sdom = svc->sdom;
@@ -471,16 +523,17 @@ __csched_vcpu_acct_stop_locked(struct cs
list_del_init(&svc->active_vcpu_elem);
if ( list_empty(&sdom->active_vcpu) )
{
- BUG_ON( csched_priv.weight < sdom->weight );
+ BUG_ON( prv->weight < sdom->weight );
list_del_init(&sdom->active_sdom_elem);
- csched_priv.weight -= sdom->weight;
+ prv->weight -= sdom->weight;
}
}
static void
-csched_vcpu_acct(unsigned int cpu)
+csched_vcpu_acct(struct csched_private *prv, unsigned int cpu)
{
struct csched_vcpu * const svc = CSCHED_VCPU(current);
+ struct scheduler *ops = per_cpu(scheduler, cpu);
ASSERT( current->processor == cpu );
ASSERT( svc->sdom != NULL );
@@ -508,9 +561,9 @@ csched_vcpu_acct(unsigned int cpu)
*/
if ( list_empty(&svc->active_vcpu_elem) )
{
- __csched_vcpu_acct_start(svc);
- }
- else if ( csched_cpu_pick(current) != cpu )
+ __csched_vcpu_acct_start(prv, svc);
+ }
+ else if ( csched_cpu_pick(ops, current) != cpu )
{
CSCHED_VCPU_STAT_CRANK(svc, migrate_r);
CSCHED_STAT_CRANK(migrate_running);
@@ -519,34 +572,54 @@ csched_vcpu_acct(unsigned int cpu)
}
}
-static int
-csched_vcpu_init(struct vcpu *vc)
-{
- struct domain * const dom = vc->domain;
- struct csched_dom *sdom = CSCHED_DOM(dom);
+static void *
+csched_alloc_vdata(struct scheduler *ops, struct vcpu *vc)
+{
struct csched_vcpu *svc;
-
- CSCHED_STAT_CRANK(vcpu_init);
/* Allocate per-VCPU info */
svc = xmalloc(struct csched_vcpu);
if ( svc == NULL )
- return -1;
+ return NULL;
INIT_LIST_HEAD(&svc->runq_elem);
INIT_LIST_HEAD(&svc->active_vcpu_elem);
- svc->sdom = sdom;
+ svc->sdom = CSCHED_DOM(vc->domain);
svc->vcpu = vc;
atomic_set(&svc->credit, 0);
svc->flags = 0U;
- svc->pri = is_idle_domain(dom) ? CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
+ svc->pri = is_idle_domain(vc->domain) ?
+ CSCHED_PRI_IDLE : CSCHED_PRI_TS_UNDER;
CSCHED_VCPU_STATS_RESET(svc);
+ return svc;
+}
+
+static void
+csched_vcpu_insert(struct scheduler *ops, struct vcpu *vc)
+{
+ struct csched_vcpu *svc = vc->sched_priv;
+
+ if ( !__vcpu_on_runq(svc) && vcpu_runnable(vc) && !vc->is_running )
+ __runq_insert(vc->processor, svc);
+}
+
+static int
+csched_vcpu_init(struct scheduler *ops, struct vcpu *vc)
+{
+ struct csched_vcpu *svc;
+
+ CSCHED_STAT_CRANK(vcpu_init);
+
+ svc = csched_alloc_vdata(ops, vc);
+ if ( svc == NULL )
+ return -1;
+
vc->sched_priv = svc;
/* Allocate per-PCPU info */
if ( unlikely(!CSCHED_PCPU(vc->processor)) )
{
- if ( csched_pcpu_init(vc->processor) != 0 )
+ if ( csched_alloc_pdata(ops, vc->processor) == NULL )
return -1;
}
@@ -555,29 +628,41 @@ csched_vcpu_init(struct vcpu *vc)
}
static void
-csched_vcpu_destroy(struct vcpu *vc)
+csched_free_vdata(struct scheduler *ops, void *priv)
+{
+ struct csched_private *prv = CSCHED_PRIV(ops);
+ struct csched_vcpu *svc = priv;
+ unsigned long flags;
+
+ if ( __vcpu_on_runq(svc) )
+ __runq_remove(svc);
+
+ spin_lock_irqsave(&(prv->lock), flags);
+
+ if ( !list_empty(&svc->active_vcpu_elem) )
+ __csched_vcpu_acct_stop_locked(prv, svc);
+
+ spin_unlock_irqrestore(&(prv->lock), flags);
+
+ xfree(svc);
+}
+
+static void
+csched_vcpu_destroy(struct scheduler *ops, struct vcpu *vc)
{
struct csched_vcpu * const svc = CSCHED_VCPU(vc);
struct csched_dom * const sdom = svc->sdom;
- unsigned long flags;
CSCHED_STAT_CRANK(vcpu_destroy);
BUG_ON( sdom == NULL );
BUG_ON( !list_empty(&svc->runq_elem) );
- spin_lock_irqsave(&csched_priv.lock, flags);
-
- if ( !list_empty(&svc->active_vcpu_elem) )
- __csched_vcpu_acct_stop_locked(svc);
-
- spin_unlock_irqrestore(&csched_priv.lock, flags);
-
- xfree(svc);
+ csched_free_vdata(ops, svc);
}
static void
-csched_vcpu_sleep(struct vcpu *vc)
+csched_vcpu_sleep(struct scheduler *ops, struct vcpu *vc)
{
struct csched_vcpu * const svc = CSCHED_VCPU(vc);
@@ -592,7 +677,7 @@ csched_vcpu_sleep(struct vcpu *vc)
}
static void
-csched_vcpu_wake(struct vcpu *vc)
+csched_vcpu_wake(struct scheduler *ops, struct vcpu *vc)
{
struct csched_vcpu * const svc = CSCHED_VCPU(vc);
const unsigned int cpu = vc->processor;
@@ -648,10 +733,11 @@ csched_vcpu_wake(struct vcpu *vc)
static int
csched_dom_cntl(
- struct domain *d,
+ struct scheduler *ops, struct domain *d,
struct xen_domctl_scheduler_op *op)
{
struct csched_dom * const sdom = CSCHED_DOM(d);
+ struct csched_private *prv = CSCHED_PRIV(ops);
unsigned long flags;
if ( op->cmd == XEN_DOMCTL_SCHEDOP_getinfo )
@@ -663,14 +749,14 @@ csched_dom_cntl(
{
ASSERT(op->cmd == XEN_DOMCTL_SCHEDOP_putinfo);
- spin_lock_irqsave(&csched_priv.lock, flags);
+ spin_lock_irqsave(&(prv->lock), flags);
if ( op->u.credit.weight != 0 )
{
if ( !list_empty(&sdom->active_sdom_elem) )
{
- csched_priv.weight -= sdom->weight;
- csched_priv.weight += op->u.credit.weight;
+ prv->weight -= sdom->weight;
+ prv->weight += op->u.credit.weight;
}
sdom->weight = op->u.credit.weight;
}
@@ -678,14 +764,14 @@ csched_dom_cntl(
if ( op->u.credit.cap != (uint16_t)~0U )
sdom->cap = op->u.credit.cap;
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ spin_unlock_irqrestore(&(prv->lock), flags);
}
return 0;
}
static int
-csched_dom_init(struct domain *dom)
+csched_dom_init(struct scheduler *ops, struct domain *dom)
{
struct csched_dom *sdom;
@@ -711,7 +797,7 @@ csched_dom_init(struct domain *dom)
}
static void
-csched_dom_destroy(struct domain *dom)
+csched_dom_destroy(struct scheduler *ops, struct domain *dom)
{
CSCHED_STAT_CRANK(dom_destroy);
xfree(CSCHED_DOM(dom));
@@ -725,7 +811,7 @@ csched_dom_destroy(struct domain *dom)
* remember the last UNDER to make the move up operation O(1).
*/
static void
-csched_runq_sort(unsigned int cpu)
+csched_runq_sort(struct csched_private *prv, unsigned int cpu)
{
struct csched_pcpu * const spc = CSCHED_PCPU(cpu);
struct list_head *runq, *elem, *next, *last_under;
@@ -733,7 +819,7 @@ csched_runq_sort(unsigned int cpu)
unsigned long flags;
int sort_epoch;
- sort_epoch = csched_priv.runq_sort;
+ sort_epoch = prv->runq_sort;
if ( sort_epoch == spc->runq_sort_last )
return;
@@ -768,8 +854,9 @@ csched_runq_sort(unsigned int cpu)
}
static void
-csched_acct(void* dummy)
-{
+csched_acct(void *dummy)
+{
+ struct csched_private *prv = dummy;
unsigned long flags;
struct list_head *iter_vcpu, *next_vcpu;
struct list_head *iter_sdom, *next_sdom;
@@ -786,22 +873,22 @@ csched_acct(void* dummy)
int credit;
- spin_lock_irqsave(&csched_priv.lock, flags);
-
- weight_total = csched_priv.weight;
- credit_total = csched_priv.credit;
+ spin_lock_irqsave(&(prv->lock), flags);
+
+ weight_total = prv->weight;
+ credit_total = prv->credit;
/* Converge balance towards 0 when it drops negative */
- if ( csched_priv.credit_balance < 0 )
- {
- credit_total -= csched_priv.credit_balance;
+ if ( prv->credit_balance < 0 )
+ {
+ credit_total -= prv->credit_balance;
CSCHED_STAT_CRANK(acct_balance);
}
if ( unlikely(weight_total == 0) )
{
- csched_priv.credit_balance = 0;
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ prv->credit_balance = 0;
+ spin_unlock_irqrestore(&(prv->lock), flags);
CSCHED_STAT_CRANK(acct_no_work);
goto out;
}
@@ -813,7 +900,7 @@ csched_acct(void* dummy)
credit_xtra = 0;
credit_cap = 0U;
- list_for_each_safe( iter_sdom, next_sdom, &csched_priv.active_sdom )
+ list_for_each_safe( iter_sdom, next_sdom, &(prv->active_sdom) )
{
sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
@@ -833,9 +920,9 @@ csched_acct(void* dummy)
* only when the system-wide credit balance is negative.
*/
credit_peak = sdom->active_vcpu_count * CSCHED_CREDITS_PER_ACCT;
- if ( csched_priv.credit_balance < 0 )
- {
- credit_peak += ( ( -csched_priv.credit_balance * sdom->weight) +
+ if ( prv->credit_balance < 0 )
+ {
+ credit_peak += ( ( -prv->credit_balance * sdom->weight) +
(weight_total - 1)
) / weight_total;
}
@@ -877,7 +964,7 @@ csched_acct(void* dummy)
*/
CSCHED_STAT_CRANK(acct_reorder);
list_del(&sdom->active_sdom_elem);
- list_add(&sdom->active_sdom_elem, &csched_priv.active_sdom);
+ list_add(&sdom->active_sdom_elem, &(prv->active_sdom));
}
credit_fair = credit_peak;
@@ -943,7 +1030,7 @@ csched_acct(void* dummy)
/* Upper bound on credits means VCPU stops earning */
if ( credit > CSCHED_CREDITS_PER_TSLICE )
{
- __csched_vcpu_acct_stop_locked(svc);
+ __csched_vcpu_acct_stop_locked(prv, svc);
credit = 0;
atomic_set(&svc->credit, credit);
}
@@ -955,15 +1042,15 @@ csched_acct(void* dummy)
}
}
- csched_priv.credit_balance = credit_balance;
-
- spin_unlock_irqrestore(&csched_priv.lock, flags);
+ prv->credit_balance = credit_balance;
+
+ spin_unlock_irqrestore(&(prv->lock), flags);
/* Inform each CPU that its runq needs to be sorted */
- csched_priv.runq_sort++;
+ prv->runq_sort++;
out:
- set_timer( &csched_priv.master_ticker, NOW() +
+ set_timer( &(prv->master_ticker), NOW() +
MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT );
}
@@ -972,6 +1059,7 @@ csched_tick(void *_cpu)
{
unsigned int cpu = (unsigned long)_cpu;
struct csched_pcpu *spc = CSCHED_PCPU(cpu);
+ struct csched_private *prv = CSCHED_PRIV(per_cpu(scheduler, cpu));
spc->tick++;
@@ -979,7 +1067,7 @@ csched_tick(void *_cpu)
* Accounting for running VCPU
*/
if ( !is_idle_vcpu(current) )
- csched_vcpu_acct(cpu);
+ csched_vcpu_acct(prv, cpu);
/*
* Check if runq needs to be sorted
@@ -988,7 +1076,7 @@ csched_tick(void *_cpu)
* modified priorities. This is a special O(n) sort and runs at most
* once per accounting period (currently 30 milliseconds).
*/
- csched_runq_sort(cpu);
+ csched_runq_sort(prv, cpu);
set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
}
@@ -1040,10 +1128,12 @@ csched_runq_steal(int peer_cpu, int cpu,
}
static struct csched_vcpu *
-csched_load_balance(int cpu, struct csched_vcpu *snext)
+csched_load_balance(struct csched_private *prv, int cpu,
+ struct csched_vcpu *snext)
{
struct csched_vcpu *speer;
cpumask_t workers;
+ cpumask_t online;
int peer_cpu;
BUG_ON( cpu != snext->vcpu->processor );
@@ -1063,7 +1153,8 @@ csched_load_balance(int cpu, struct csch
* Peek at non-idling CPUs in the system, starting with our
* immediate neighbour.
*/
- cpus_andnot(workers, cpu_online_map, csched_priv.idlers);
+ online = CSCHED_CPUONLINE(per_cpu(cpupool, cpu));
+ cpus_andnot(workers, online, prv->idlers);
cpu_clear(cpu, workers);
peer_cpu = cpu;
@@ -1105,16 +1196,39 @@ csched_load_balance(int cpu, struct csch
* fast for the common case.
*/
static struct task_slice
-csched_schedule(s_time_t now)
+csched_schedule(struct scheduler *ops, s_time_t now)
{
const int cpu = smp_processor_id();
struct list_head * const runq = RUNQ(cpu);
struct csched_vcpu * const scurr = CSCHED_VCPU(current);
+ struct csched_private *prv = CSCHED_PRIV(ops);
struct csched_vcpu *snext;
struct task_slice ret;
CSCHED_STAT_CRANK(schedule);
CSCHED_VCPU_CHECK(current);
+
+ if ( unlikely(!cpu_isset(cpu, CSCHED_CPUONLINE(per_cpu(cpupool, cpu)))) )
+ {
+ struct list_head * iter;
+
+ snext = scurr;
+ if (is_idle_vcpu(current))
+ goto out;
+
+ if ( vcpu_runnable(current) )
+ __runq_insert(cpu, scurr);
+
+ list_for_each(iter, runq)
+ {
+ snext = __runq_elem(iter);
+ if ( snext->pri == CSCHED_PRI_IDLE )
+ break;
+ }
+ BUG_ON( snext->pri != CSCHED_PRI_IDLE );
+ __runq_remove(snext);
+ goto out;
+ }
/*
* Select next runnable local VCPU (ie top of local runq)
@@ -1137,20 +1251,21 @@ csched_schedule(s_time_t now)
if ( snext->pri > CSCHED_PRI_TS_OVER )
__runq_remove(snext);
else
- snext = csched_load_balance(cpu, snext);
-
+ snext = csched_load_balance(prv, cpu, snext);
+
+out:
/*
* Update idlers mask if necessary. When we're idling, other CPUs
* will tickle us when they get extra work.
*/
if ( snext->pri == CSCHED_PRI_IDLE )
{
- if ( !cpu_isset(cpu, csched_priv.idlers) )
- cpu_set(cpu, csched_priv.idlers);
- }
- else if ( cpu_isset(cpu, csched_priv.idlers) )
- {
- cpu_clear(cpu, csched_priv.idlers);
+ if ( !cpu_isset(cpu, prv->idlers) )
+ cpu_set(cpu, prv->idlers);
+ }
+ else if ( cpu_isset(cpu, prv->idlers) )
+ {
+ cpu_clear(cpu, prv->idlers);
}
/*
@@ -1194,7 +1309,7 @@ csched_dump_vcpu(struct csched_vcpu *svc
}
static void
-csched_dump_pcpu(int cpu)
+csched_dump_pcpu(struct scheduler *ops, int cpu)
{
struct list_head *runq, *iter;
struct csched_pcpu *spc;
@@ -1231,9 +1346,10 @@ csched_dump_pcpu(int cpu)
}
static void
-csched_dump(void)
+csched_dump(struct scheduler *ops)
{
struct list_head *iter_sdom, *iter_svc;
+ struct csched_private *prv = CSCHED_PRIV(ops);
int loop;
char idlers_buf[100];
@@ -1250,12 +1366,12 @@ csched_dump(void)
"\tticks per tslice = %d\n"
"\tticks per acct = %d\n"
"\tmigration delay = %uus\n",
- csched_priv.ncpus,
- csched_priv.master,
- csched_priv.credit,
- csched_priv.credit_balance,
- csched_priv.weight,
- csched_priv.runq_sort,
+ prv->ncpus,
+ prv->master,
+ prv->credit,
+ prv->credit_balance,
+ prv->weight,
+ prv->runq_sort,
CSCHED_DEFAULT_WEIGHT,
CSCHED_MSECS_PER_TICK,
CSCHED_CREDITS_PER_TICK,
@@ -1263,12 +1379,12 @@ csched_dump(void)
CSCHED_TICKS_PER_ACCT,
vcpu_migration_delay);
- cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), csched_priv.idlers);
+ cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), prv->idlers);
printk("idlers: %s\n", idlers_buf);
printk("active vcpus:\n");
loop = 0;
- list_for_each( iter_sdom, &csched_priv.active_sdom )
+ list_for_each( iter_sdom, &(prv->active_sdom) )
{
struct csched_dom *sdom;
sdom = list_entry(iter_sdom, struct csched_dom, active_sdom_elem);
@@ -1284,18 +1400,29 @@ csched_dump(void)
}
}
-static void
-csched_init(void)
-{
- spin_lock_init(&csched_priv.lock);
- INIT_LIST_HEAD(&csched_priv.active_sdom);
- csched_priv.ncpus = 0;
- csched_priv.master = UINT_MAX;
- cpus_clear(csched_priv.idlers);
- csched_priv.weight = 0U;
- csched_priv.credit = 0U;
- csched_priv.credit_balance = 0;
- csched_priv.runq_sort = 0U;
+static int
+csched_init(struct scheduler *ops)
+{
+ struct csched_private *prv;
+
+ prv = xmalloc(struct csched_private);
+ if ( prv == NULL )
+ return 1;
+ if (csched_priv0 == NULL)
+ csched_priv0 = prv;
+ ops->sched_data = prv;
+ spin_lock_init(&(prv->lock));
+ INIT_LIST_HEAD(&(prv->active_sdom));
+ prv->ncpus = 0;
+ prv->master = UINT_MAX;
+ cpus_clear(prv->idlers);
+ prv->weight = 0U;
+ prv->credit = 0U;
+ prv->credit_balance = 0;
+ prv->runq_sort = 0U;
+ prv->ticker_active = (csched_priv0 == prv) ? 0 : 1;
+
+ return 0;
}
/* Tickers cannot be kicked until SMP subsystem is alive. */
@@ -1305,8 +1432,10 @@ static __init int csched_start_tickers(v
unsigned int cpu;
/* Is the credit scheduler initialised? */
- if ( csched_priv.ncpus == 0 )
+ if ( (csched_priv0 == NULL) || (csched_priv0->ncpus == 0) )
return 0;
+
+ csched_priv0->ticker_active = 1;
for_each_online_cpu ( cpu )
{
@@ -1314,45 +1443,70 @@ static __init int csched_start_tickers(v
set_timer(&spc->ticker, NOW() + MILLISECS(CSCHED_MSECS_PER_TICK));
}
- init_timer( &csched_priv.master_ticker, csched_acct, NULL,
- csched_priv.master);
-
- set_timer( &csched_priv.master_ticker, NOW() +
+ init_timer( &(csched_priv0->master_ticker), csched_acct, csched_priv0,
+ csched_priv0->master);
+
+ set_timer( &(csched_priv0->master_ticker), NOW() +
MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT );
return 0;
}
__initcall(csched_start_tickers);
-static void csched_tick_suspend(void)
+static void
+csched_deinit(struct scheduler *ops)
+{
+ struct csched_private *prv;
+
+ prv = CSCHED_PRIV(ops);
+ if ( prv != NULL )
+ xfree(prv);
+}
+
+static void csched_tick_suspend(struct scheduler *ops, unsigned int cpu)
{
struct csched_pcpu *spc;
- spc = CSCHED_PCPU(smp_processor_id());
+ spc = CSCHED_PCPU(cpu);
stop_timer(&spc->ticker);
}
-static void csched_tick_resume(void)
+static void csched_tick_resume(struct scheduler *ops, unsigned int cpu)
{
struct csched_pcpu *spc;
uint64_t now = NOW();
-
- spc = CSCHED_PCPU(smp_processor_id());
+ struct csched_private *prv;
+
+ prv = CSCHED_PRIV(ops);
+ if ( !prv->ticker_active )
+ return;
+
+ spc = CSCHED_PCPU(cpu);
set_timer(&spc->ticker, now + MILLISECS(CSCHED_MSECS_PER_TICK)
- now % MILLISECS(CSCHED_MSECS_PER_TICK) );
+
+ if ( (prv->ticker_active == 2) && (prv->master == cpu) )
+ {
+ set_timer( &prv->master_ticker, now +
+ MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT -
+ now % MILLISECS(CSCHED_MSECS_PER_TICK) * CSCHED_TICKS_PER_ACCT);
+ prv->ticker_active = 1;
+ }
}
struct scheduler sched_credit_def = {
.name = "SMP Credit Scheduler",
.opt_name = "credit",
.sched_id = XEN_SCHEDULER_CREDIT,
+ .sched_data = &csched_priv,
.init_domain = csched_dom_init,
.destroy_domain = csched_dom_destroy,
.init_vcpu = csched_vcpu_init,
+ .insert_vcpu = csched_vcpu_insert,
.destroy_vcpu = csched_vcpu_destroy,
.sleep = csched_vcpu_sleep,
@@ -1366,6 +1520,11 @@ struct scheduler sched_credit_def = {
.dump_cpu_state = csched_dump_pcpu,
.dump_settings = csched_dump,
.init = csched_init,
+ .deinit = csched_deinit,
+ .alloc_vdata = csched_alloc_vdata,
+ .free_vdata = csched_free_vdata,
+ .alloc_pdata = csched_alloc_pdata,
+ .free_pdata = csched_free_pdata,
.tick_suspend = csched_tick_suspend,
.tick_resume = csched_tick_resume,
diff -r 655dc3bc1d8e xen/common/sched_sedf.c
--- a/xen/common/sched_sedf.c Thu Apr 16 11:54:06 2009 +0100
+++ b/xen/common/sched_sedf.c Thu Apr 09 14:54:22 2009 +0200
@@ -20,6 +20,9 @@
if ( (_f) <= SEDFLEVEL ) \
printk(_a ); \
} while ( 0 )
+
+#define SEDF_CPUONLINE(_pool) \
+ (((_pool) == NULL) ? cpupool_free_cpus : (_pool)->cpu_valid)
#ifndef NDEBUG
#define SEDF_STATS
@@ -132,7 +135,7 @@ struct sedf_cpu_info {
#define sedf_runnable(edom) (!(EDOM_INFO(edom)->status & SEDF_ASLEEP))
-static void sedf_dump_cpu_state(int i);
+static void sedf_dump_cpu_state(struct scheduler *ops, int i);
static inline int extraq_on(struct vcpu *d, int i)
{
@@ -329,30 +332,17 @@ static inline void __add_to_runqueue_sor
}
-static int sedf_init_vcpu(struct vcpu *v)
+static void *sedf_alloc_vdata(struct scheduler *ops, struct vcpu *v)
{
struct sedf_vcpu_info *inf;
- if ( (v->sched_priv = xmalloc(struct sedf_vcpu_info)) == NULL )
- return -1;
- memset(v->sched_priv, 0, sizeof(struct sedf_vcpu_info));
-
- inf = EDOM_INFO(v);
+ inf = xmalloc(struct sedf_vcpu_info);
+ if ( inf == NULL )
+ return NULL;
+
+ memset(inf, 0, sizeof(struct sedf_vcpu_info));
inf->vcpu = v;
-
- /* Allocate per-CPU context if this is the first domain to be added. */
- if ( unlikely(per_cpu(schedule_data, v->processor).sched_priv == NULL) )
- {
- per_cpu(schedule_data, v->processor).sched_priv =
- xmalloc(struct sedf_cpu_info);
- BUG_ON(per_cpu(schedule_data, v->processor).sched_priv == NULL);
- memset(CPU_INFO(v->processor), 0, sizeof(*CPU_INFO(v->processor)));
- INIT_LIST_HEAD(WAITQ(v->processor));
- INIT_LIST_HEAD(RUNQ(v->processor));
- INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_PEN_Q));
- INIT_LIST_HEAD(EXTRAQ(v->processor,EXTRA_UTIL_Q));
- }
-
+
/* Every VCPU gets an equal share of extratime by default. */
inf->deadl_abs = 0;
inf->latency = 0;
@@ -383,19 +373,69 @@ static int sedf_init_vcpu(struct vcpu *v
}
else
{
- EDOM_INFO(v)->deadl_abs = 0;
- EDOM_INFO(v)->status &= ~SEDF_ASLEEP;
- }
-
+ inf->deadl_abs = 0;
+ inf->status &= ~SEDF_ASLEEP;
+ }
+
+ return inf;
+}
+
+static void *
+sedf_alloc_pdata(struct scheduler *ops, int cpu)
+{
+ struct sedf_cpu_info *spc;
+
+ spc = xmalloc(struct sedf_cpu_info);
+ BUG_ON(spc == NULL);
+ memset(spc, 0, sizeof(*spc));
+ INIT_LIST_HEAD(&spc->waitq);
+ INIT_LIST_HEAD(&spc->runnableq);
+ INIT_LIST_HEAD(&spc->extraq[EXTRA_PEN_Q]);
+ INIT_LIST_HEAD(&spc->extraq[EXTRA_UTIL_Q]);
+
+ return (void *)spc;
+}
+
+static void
+sedf_free_pdata(struct scheduler *ops, void *spc, int cpu)
+{
+ if ( spc == NULL )
+ return;
+
+ xfree(spc);
+}
+
+static int sedf_init_vcpu(struct scheduler *ops, struct vcpu *v)
+{
+ struct sedf_vcpu_info *inf;
+
+ /* Allocate per-CPU context if this is the first domain to be added. */
+ if ( unlikely(per_cpu(schedule_data, v->processor).sched_priv == NULL) )
+ {
+ per_cpu(schedule_data, v->processor).sched_priv =
+ sedf_alloc_pdata(ops, v->processor);
+ }
+
+ inf = sedf_alloc_vdata(ops, v);
+ if ( inf == NULL )
+ return -1;
+
+ v->sched_priv = inf;
+
return 0;
}
-static void sedf_destroy_vcpu(struct vcpu *v)
-{
- xfree(v->sched_priv);
-}
-
-static int sedf_init_domain(struct domain *d)
+static void sedf_free_vdata(struct scheduler *ops, void *priv)
+{
+ xfree(priv);
+}
+
+static void sedf_destroy_vcpu(struct scheduler *ops, struct vcpu *v)
+{
+ sedf_free_vdata(ops, v->sched_priv);
+}
+
+static int sedf_init_domain(struct scheduler *ops, struct domain *d)
{
d->sched_priv = xmalloc(struct sedf_dom_info);
if ( d->sched_priv == NULL )
@@ -406,16 +446,18 @@ static int sedf_init_domain(struct domai
return 0;
}
-static void sedf_destroy_domain(struct domain *d)
+static void sedf_destroy_domain(struct scheduler *ops, struct domain *d)
{
xfree(d->sched_priv);
}
-static int sedf_pick_cpu(struct vcpu *v)
+static int sedf_pick_cpu(struct scheduler *ops, struct vcpu *v)
{
cpumask_t online_affinity;
-
- cpus_and(online_affinity, v->cpu_affinity, cpu_online_map);
+ cpumask_t online;
+
+ online = SEDF_CPUONLINE(v->domain->cpupool);
+ cpus_and(online_affinity, v->cpu_affinity, online);
return first_cpu(online_affinity);
}
@@ -751,7 +793,7 @@ static struct task_slice sedf_do_extra_s
-timeslice for the current period used up
-domain on waitqueue has started it's period
-and various others ;) in general: determine which domain to run next*/
-static struct task_slice sedf_do_schedule(s_time_t now)
+static struct task_slice sedf_do_schedule(struct scheduler *ops, s_time_t now)
{
int cpu = smp_processor_id();
struct list_head *runq = RUNQ(cpu);
@@ -786,6 +828,13 @@ static struct task_slice sedf_do_schedul
}
check_waitq:
update_queues(now, runq, waitq);
+
+ if ( unlikely(!cpu_isset(cpu, SEDF_CPUONLINE(per_cpu(cpupool, cpu)))) )
+ {
+ ret.task = IDLETASK(cpu);
+ ret.time = SECONDS(1);
+ goto sched_done;
+ }
/*now simply pick the first domain from the runqueue, which has the
earliest deadline, because the list is sorted*/
@@ -848,7 +897,7 @@ static struct task_slice sedf_do_schedul
}
-static void sedf_sleep(struct vcpu *d)
+static void sedf_sleep(struct scheduler *ops, struct vcpu *d)
{
PRINT(2,"sedf_sleep was called, domain-id %i.%i\n",
d->domain->domain_id, d->vcpu_id);
@@ -1067,7 +1116,7 @@ static inline int should_switch(struct v
return 1;
}
-static void sedf_wake(struct vcpu *d)
+static void sedf_wake(struct scheduler *ops, struct vcpu *d)
{
s_time_t now = NOW();
struct sedf_vcpu_info* inf = EDOM_INFO(d);
@@ -1220,8 +1269,8 @@ static void sedf_dump_domain(struct vcpu
}
-/* dumps all domains on hte specified cpu */
-static void sedf_dump_cpu_state(int i)
+/* dumps all domains on the specified cpu */
+static void sedf_dump_cpu_state(struct scheduler *ops, int i)
{
struct list_head *list, *queue, *tmp;
struct sedf_vcpu_info *d_inf;
@@ -1294,7 +1343,7 @@ static void sedf_dump_cpu_state(int i)
/* Adjusts periods and slices of the domains accordingly to their weights. */
-static int sedf_adjust_weights(struct xen_domctl_scheduler_op *cmd)
+static int sedf_adjust_weights(struct cpupool *c, struct
xen_domctl_scheduler_op *cmd)
{
struct vcpu *p;
struct domain *d;
@@ -1315,6 +1364,8 @@ static int sedf_adjust_weights(struct xe
rcu_read_lock(&domlist_read_lock);
for_each_domain( d )
{
+ if ( c != d->cpupool )
+ continue;
for_each_vcpu( d, p )
{
if ( EDOM_INFO(p)->weight )
@@ -1366,7 +1417,7 @@ static int sedf_adjust_weights(struct xe
/* set or fetch domain scheduling parameters */
-static int sedf_adjust(struct domain *p, struct xen_domctl_scheduler_op *op)
+static int sedf_adjust(struct scheduler *ops, struct domain *p, struct
xen_domctl_scheduler_op *op)
{
struct vcpu *v;
int rc;
@@ -1425,7 +1476,7 @@ static int sedf_adjust(struct domain *p,
}
}
- rc = sedf_adjust_weights(op);
+ rc = sedf_adjust_weights(p->cpupool, op);
if ( rc )
return rc;
@@ -1463,6 +1514,11 @@ struct scheduler sched_sedf_def = {
.init_vcpu = sedf_init_vcpu,
.destroy_vcpu = sedf_destroy_vcpu,
+
+ .alloc_vdata = sedf_alloc_vdata,
+ .free_vdata = sedf_free_vdata,
+ .alloc_pdata = sedf_alloc_pdata,
+ .free_pdata = sedf_free_pdata,
.do_schedule = sedf_do_schedule,
.pick_cpu = sedf_pick_cpu,
diff -r 655dc3bc1d8e xen/common/schedule.c
--- a/xen/common/schedule.c Thu Apr 16 11:54:06 2009 +0100
+++ b/xen/common/schedule.c Thu Apr 16 09:18:40 2009 +0200
@@ -55,6 +55,7 @@ static void poll_timer_fn(void *data);
/* This is global for now so that private implementations can reach it */
DEFINE_PER_CPU(struct schedule_data, schedule_data);
+DEFINE_PER_CPU(struct scheduler *, scheduler);
extern struct scheduler sched_sedf_def;
extern struct scheduler sched_credit_def;
@@ -66,9 +67,15 @@ static struct scheduler *schedulers[] =
static struct scheduler ops;
-#define SCHED_OP(fn, ...) \
- (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ ) \
- : (typeof(ops.fn(__VA_ARGS__)))0 )
+#define SCHED_OP(opsptr, fn, ...) \
+ (( (opsptr)->fn != NULL ) ? (opsptr)->fn(opsptr, ##__VA_ARGS__ ) \
+ : (typeof((opsptr)->fn(opsptr, ##__VA_ARGS__)))0 )
+
+#define DOM2OP(_d) (((_d)->cpupool == NULL) ? &ops :
&((_d)->cpupool->sched))
+#define VCPU2OP(_v) (DOM2OP((_v)->domain))
+#define VCPU2ONLINE(_v) \
+ (((_v)->domain->cpupool == NULL) ? cpu_online_map \
+ : (_v)->domain->cpupool->cpu_valid)
static inline void trace_runstate_change(struct vcpu *v, int new_state)
{
@@ -182,7 +189,13 @@ int sched_init_vcpu(struct vcpu *v, unsi
TRACE_2D(TRC_SCHED_DOM_ADD, v->domain->domain_id, v->vcpu_id);
- return SCHED_OP(init_vcpu, v);
+ if ( SCHED_OP(DOM2OP(d), init_vcpu, v) != 0 )
+ return 1;
+
+ if ( is_idle_domain(d) )
+ per_cpu(schedule_data, v->processor).sched_idlevpriv = v->sched_priv;
+
+ return 0;
}
void sched_destroy_vcpu(struct vcpu *v)
@@ -190,17 +203,47 @@ void sched_destroy_vcpu(struct vcpu *v)
kill_timer(&v->periodic_timer);
kill_timer(&v->singleshot_timer);
kill_timer(&v->poll_timer);
- SCHED_OP(destroy_vcpu, v);
+ SCHED_OP(VCPU2OP(v), destroy_vcpu, v);
+}
+
+void sched_move_domain(struct domain *d, struct cpupool *c)
+{
+ struct vcpu *v;
+ unsigned int new_p;
+
+ domain_pause(d);
+
+ new_p = first_cpu(c->cpu_valid);
+ for_each_vcpu ( d, v )
+ {
+ migrate_timer(&v->periodic_timer, new_p);
+ migrate_timer(&v->singleshot_timer, new_p);
+ migrate_timer(&v->poll_timer, new_p);
+
+ SCHED_OP(VCPU2OP(v), destroy_vcpu, v);
+
+ cpus_setall(v->cpu_affinity);
+ v->processor = new_p;
+ SCHED_OP(&(c->sched), init_vcpu, v);
+
+ new_p = next_cpu(new_p, c->cpu_valid);
+ if ( new_p == NR_CPUS )
+ new_p = first_cpu(c->cpu_valid);
+ }
+
+ d->cpupool = c;
+
+ domain_unpause(d);
}
int sched_init_domain(struct domain *d)
{
- return SCHED_OP(init_domain, d);
+ return SCHED_OP(DOM2OP(d), init_domain, d);
}
void sched_destroy_domain(struct domain *d)
{
- SCHED_OP(destroy_domain, d);
+ SCHED_OP(DOM2OP(d), destroy_domain, d);
}
void vcpu_sleep_nosync(struct vcpu *v)
@@ -214,7 +257,7 @@ void vcpu_sleep_nosync(struct vcpu *v)
if ( v->runstate.state == RUNSTATE_runnable )
vcpu_runstate_change(v, RUNSTATE_offline, NOW());
- SCHED_OP(sleep, v);
+ SCHED_OP(VCPU2OP(v), sleep, v);
}
vcpu_schedule_unlock_irqrestore(v, flags);
@@ -242,7 +285,7 @@ void vcpu_wake(struct vcpu *v)
{
if ( v->runstate.state >= RUNSTATE_blocked )
vcpu_runstate_change(v, RUNSTATE_runnable, NOW());
- SCHED_OP(wake, v);
+ SCHED_OP(VCPU2OP(v), wake, v);
}
else if ( !test_bit(_VPF_blocked, &v->pause_flags) )
{
@@ -297,7 +340,7 @@ static void vcpu_migrate(struct vcpu *v)
/* Switch to new CPU, then unlock old CPU. */
old_cpu = v->processor;
- v->processor = SCHED_OP(pick_cpu, v);
+ v->processor = SCHED_OP(VCPU2OP(v), pick_cpu, v);
spin_unlock_irqrestore(
&per_cpu(schedule_data, old_cpu).schedule_lock, flags);
@@ -326,22 +369,32 @@ void vcpu_force_reschedule(struct vcpu *
}
/*
- * This function is used by cpu_hotplug code from stop_machine context.
- * Hence we can avoid needing to take the
+ * This function is used by cpu_hotplug code from stop_machine context
+ * and from cpupools to switch schedulers on a cpu.
*/
-void cpu_disable_scheduler(void)
+int cpu_disable_scheduler(unsigned int cpu, int lock)
{
struct domain *d;
struct vcpu *v;
- unsigned int cpu = smp_processor_id();
+ struct cpupool *c;
+ int ret = 0;
+
+ c = per_cpu(cpupool, cpu);
+ if ( c == NULL )
+ return ret;
for_each_domain ( d )
{
+ if ( (d->cpupool != c) || c->pool_paused )
+ continue;
+
for_each_vcpu ( d, v )
{
if ( is_idle_vcpu(v) )
continue;
+ if ( lock != 0 )
+ vcpu_schedule_lock_irq(v);
if ( (cpus_weight(v->cpu_affinity) == 1) &&
cpu_isset(cpu, v->cpu_affinity) )
{
@@ -351,29 +404,49 @@ void cpu_disable_scheduler(void)
}
/*
- * Migrate single-shot timers to CPU0. A new cpu will automatically
- * be chosen when the timer is next re-set.
+ * Migrate single-shot timers to other cpu of same pool. A new cpu
+ * will automatically be chosen when the timer is next re-set.
*/
if ( v->singleshot_timer.cpu == cpu )
- migrate_timer(&v->singleshot_timer, 0);
+ {
+ int cpu_mig;
+
+ cpu_mig = first_cpu(c->cpu_valid);
+ if (cpu_mig == cpu)
+ cpu_mig = next_cpu(cpu_mig, c->cpu_valid);
+ migrate_timer(&v->singleshot_timer, cpu_mig);
+ }
if ( v->processor == cpu )
{
set_bit(_VPF_migrating, &v->pause_flags);
+ if ( lock != 0 )
+ vcpu_schedule_unlock_irq(v);
vcpu_sleep_nosync(v);
vcpu_migrate(v);
}
+ else if ( lock != 0 )
+ vcpu_schedule_unlock_irq(v);
+ /*
+ * A vcpu active in the hypervisor will not be migratable.
+ * The caller should try again after releasing and reaquiring
+ * all locks.
+ */
+ if ( v->processor == cpu )
+ ret = -EAGAIN;
}
}
+ return ret;
}
static int __vcpu_set_affinity(
struct vcpu *v, cpumask_t *affinity,
bool_t old_lock_status, bool_t new_lock_status)
{
- cpumask_t online_affinity, old_affinity;
-
- cpus_and(online_affinity, *affinity, cpu_online_map);
+ cpumask_t online, online_affinity, old_affinity;
+
+ online = VCPU2ONLINE(v);
+ cpus_and(online_affinity, *affinity, online);
if ( cpus_empty(online_affinity) )
return -EINVAL;
@@ -424,12 +497,13 @@ int vcpu_locked_change_affinity(struct v
void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity)
{
- cpumask_t online_affinity;
+ cpumask_t online, online_affinity;
/* Do not fail if no CPU in old affinity mask is online. */
- cpus_and(online_affinity, *affinity, cpu_online_map);
+ online = VCPU2ONLINE(v);
+ cpus_and(online_affinity, *affinity, online);
if ( cpus_empty(online_affinity) )
- *affinity = cpu_online_map;
+ *affinity = VCPU2ONLINE(v);
if ( __vcpu_set_affinity(v, affinity, 1, 0) != 0 )
BUG();
@@ -721,7 +795,7 @@ long sched_adjust(struct domain *d, stru
struct vcpu *v;
long ret;
- if ( (op->sched_id != ops.sched_id) ||
+ if ( (op->sched_id != DOM2OP(d)->sched_id) ||
((op->cmd != XEN_DOMCTL_SCHEDOP_putinfo) &&
(op->cmd != XEN_DOMCTL_SCHEDOP_getinfo)) )
return -EINVAL;
@@ -748,7 +822,7 @@ long sched_adjust(struct domain *d, stru
if ( d == current->domain )
vcpu_schedule_lock_irq(current);
- if ( (ret = SCHED_OP(adjust, d, op)) == 0 )
+ if ( (ret = SCHED_OP(DOM2OP(d), adjust, d, op)) == 0 )
TRACE_1D(TRC_SCHED_ADJDOM, d->domain_id);
if ( d == current->domain )
@@ -796,6 +870,7 @@ static void schedule(void)
{
struct vcpu *prev = current, *next = NULL;
s_time_t now = NOW();
+ struct scheduler *sched = this_cpu(scheduler);
struct schedule_data *sd;
struct task_slice next_slice;
@@ -811,7 +886,7 @@ static void schedule(void)
stop_timer(&sd->s_timer);
/* get policy-specific decision on scheduling... */
- next_slice = ops.do_schedule(now);
+ next_slice = sched->do_schedule(sched, now);
next = next_slice.task;
@@ -911,18 +986,25 @@ static void poll_timer_fn(void *data)
vcpu_unblock(v);
}
+/* Get scheduler by id */
+struct scheduler *scheduler_get_by_id(unsigned int id)
+{
+ int i;
+
+ for ( i = 0; schedulers[i] != NULL; i++ )
+ {
+ if ( schedulers[i]->sched_id == id )
+ return schedulers[i];
+ }
+ return NULL;
+}
+
/* Initialise the data structures. */
void __init scheduler_init(void)
{
int i;
open_softirq(SCHEDULE_SOFTIRQ, schedule);
-
- for_each_cpu ( i )
- {
- spin_lock_init(&per_cpu(schedule_data, i).schedule_lock);
- init_timer(&per_cpu(schedule_data, i).s_timer, s_timer_fn, NULL, i);
- }
for ( i = 0; schedulers[i] != NULL; i++ )
{
@@ -934,43 +1016,121 @@ void __init scheduler_init(void)
if ( schedulers[i] == NULL )
printk("Could not find scheduler: %s\n", opt_sched);
+ for_each_cpu ( i )
+ {
+ per_cpu(scheduler, i) = &ops;
+ spin_lock_init(&per_cpu(schedule_data, i).schedule_lock);
+ init_timer(&per_cpu(schedule_data, i).s_timer, s_timer_fn, NULL, i);
+ }
+
printk("Using scheduler: %s (%s)\n", ops.name, ops.opt_name);
- SCHED_OP(init);
-}
-
-void dump_runq(unsigned char key)
-{
- s_time_t now = NOW();
- int i;
+ if ( SCHED_OP(&ops, init) )
+ panic("scheduler returned error on init\n");
+}
+
+/* switch scheduler on cpu */
+void schedule_cpu_switch(unsigned int cpu, struct cpupool *c)
+{
unsigned long flags;
-
- local_irq_save(flags);
-
- printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name);
- SCHED_OP(dump_settings);
- printk("sched_smt_power_savings: %s\n",
- sched_smt_power_savings? "enabled":"disabled");
- printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now);
-
- for_each_online_cpu ( i )
+ struct vcpu *v;
+ void *vpriv = NULL;
+ void *ppriv;
+ void *ppriv_old;
+ struct scheduler *old_ops;
+ struct scheduler *new_ops;
+
+ old_ops = per_cpu(scheduler, cpu);
+ new_ops = (c == NULL) ? &ops : &(c->sched);
+ v = per_cpu(schedule_data, cpu).idle;
+ ppriv = SCHED_OP(new_ops, alloc_pdata, cpu);
+ if ( c != NULL )
+ vpriv = SCHED_OP(new_ops, alloc_vdata, v);
+
+ spin_lock_irqsave(&per_cpu(schedule_data, cpu).schedule_lock, flags);
+
+ if ( c == NULL )
+ {
+ vpriv = v->sched_priv;
+ v->sched_priv = per_cpu(schedule_data, cpu).sched_idlevpriv;
+ }
+ else
+ {
+ v->sched_priv = vpriv;
+ vpriv = NULL;
+ }
+ SCHED_OP(old_ops, tick_suspend, cpu);
+ per_cpu(scheduler, cpu) = new_ops;
+ ppriv_old = per_cpu(schedule_data, cpu).sched_priv;
+ per_cpu(schedule_data, cpu).sched_priv = ppriv;
+ SCHED_OP(new_ops, tick_resume, cpu);
+ SCHED_OP(new_ops, insert_vcpu, v);
+
+ spin_unlock_irqrestore(&per_cpu(schedule_data, cpu).schedule_lock, flags);
+
+ if ( vpriv != NULL )
+ SCHED_OP(old_ops, free_vdata, vpriv);
+ SCHED_OP(old_ops, free_pdata, ppriv_old, cpu);
+}
+
+/* init scheduler global data */
+int schedule_init_global(char *name, struct scheduler *sched)
+{
+ int i;
+ struct scheduler *data;
+
+ data = &ops;
+ for ( i = 0; (schedulers[i] != NULL) && (name != NULL) ; i++ )
+ {
+ if ( strcmp(schedulers[i]->opt_name, name) == 0 )
+ {
+ data = schedulers[i];
+ break;
+ }
+ }
+ memcpy(sched, data, sizeof(*sched));
+ return SCHED_OP(sched, init);
+}
+
+/* deinitialize scheduler global data */
+void schedule_deinit_global(struct scheduler *sched)
+{
+ SCHED_OP(sched, deinit);
+}
+
+void schedule_dump(struct cpupool *c)
+{
+ int i;
+ struct scheduler *sched;
+ cpumask_t cpus;
+
+ sched = (c == NULL) ? &ops : &(c->sched);
+ cpus = (c == NULL) ? cpupool_free_cpus : c->cpu_valid;
+ printk("Scheduler: %s (%s)\n", sched->name, sched->opt_name);
+ SCHED_OP(sched, dump_settings);
+
+ for_each_cpu_mask (i, cpus)
{
spin_lock(&per_cpu(schedule_data, i).schedule_lock);
printk("CPU[%02d] ", i);
- SCHED_OP(dump_cpu_state, i);
+ SCHED_OP(sched, dump_cpu_state, i);
spin_unlock(&per_cpu(schedule_data, i).schedule_lock);
}
-
- local_irq_restore(flags);
-}
-
-void sched_tick_suspend(void)
-{
- SCHED_OP(tick_suspend);
-}
-
-void sched_tick_resume(void)
-{
- SCHED_OP(tick_resume);
+}
+
+void sched_tick_suspend(unsigned int cpu)
+{
+ struct scheduler *sched;
+
+ sched = per_cpu(scheduler, cpu);
+ SCHED_OP(sched, tick_suspend, cpu);
+}
+
+void sched_tick_resume(unsigned int cpu)
+{
+ struct scheduler *sched;
+
+ sched = per_cpu(scheduler, cpu);
+ SCHED_OP(sched, tick_resume, cpu);
}
#ifdef CONFIG_COMPAT
diff -r 655dc3bc1d8e xen/include/public/domctl.h
--- a/xen/include/public/domctl.h Thu Apr 16 11:54:06 2009 +0100
+++ b/xen/include/public/domctl.h Thu Apr 09 11:47:18 2009 +0200
@@ -59,7 +59,11 @@ struct xen_domctl_createdomain {
/* Should domain memory integrity be verifed by tboot during Sx? */
#define _XEN_DOMCTL_CDF_s3_integrity 2
#define XEN_DOMCTL_CDF_s3_integrity (1U<<_XEN_DOMCTL_CDF_s3_integrity)
+ /* cpupool is specified (0 otherwise) */
+#define _XEN_DOMCTL_CDF_pool 3
+#define XEN_DOMCTL_CDF_pool (1U<<_XEN_DOMCTL_CDF_pool)
uint32_t flags;
+ uint32_t cpupool;
};
typedef struct xen_domctl_createdomain xen_domctl_createdomain_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t);
@@ -109,6 +113,7 @@ struct xen_domctl_getdomaininfo {
uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */
uint32_t ssidref;
xen_domain_handle_t handle;
+ uint32_t cpupool;
};
typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t);
@@ -645,6 +650,30 @@ typedef struct xen_domctl_hvmcontext_par
XEN_GUEST_HANDLE_64(uint8) buffer; /* OUT: buffer to write record into */
} xen_domctl_hvmcontext_partial_t;
DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t);
+
+/*
+ * Move domain to specified cpupool.
+ */
+#define XEN_DOMCTL_cpupool_op 56
+#define XEN_DOMCTL_CPUPOOL_OP_CREATE 1 /* C */
+#define XEN_DOMCTL_CPUPOOL_OP_DESTROY 2 /* D */
+#define XEN_DOMCTL_CPUPOOL_OP_INFO 3 /* I */
+#define XEN_DOMCTL_CPUPOOL_OP_ADDCPU 4 /* A */
+#define XEN_DOMCTL_CPUPOOL_OP_RMCPU 5 /* R */
+#define XEN_DOMCTL_CPUPOOL_OP_MOVEDOMAIN 6 /* M */
+#define XEN_DOMCTL_CPUPOOL_OP_FREEINFO 7 /* F */
+#define XEN_DOMCTL_CPUPOOL_PAR_ANY 0xFFFFFFFF
+struct xen_domctl_cpupool_op {
+ uint32_t op; /* IN */
+ uint32_t cpupool_id; /* IN: CDIARM OUT: CI */
+ uint32_t sched_id; /* IN: C OUT: I */
+ uint32_t domid; /* IN: M */
+ uint32_t cpu; /* IN: AR */
+ uint32_t n_dom; /* OUT: I */
+ struct xenctl_cpumap cpumap; /* OUT: IF */
+};
+typedef struct xen_domctl_cpupool_op xen_domctl_cpupool_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpupool_op_t);
struct xen_domctl {
@@ -688,6 +717,7 @@ struct xen_domctl {
struct xen_domctl_set_target set_target;
struct xen_domctl_subscribe subscribe;
struct xen_domctl_debug_op debug_op;
+ struct xen_domctl_cpupool_op cpupool_op;
#if defined(__i386__) || defined(__x86_64__)
struct xen_domctl_cpuid cpuid;
#endif
diff -r 655dc3bc1d8e xen/include/xen/sched-if.h
--- a/xen/include/xen/sched-if.h Thu Apr 16 11:54:06 2009 +0100
+++ b/xen/include/xen/sched-if.h Thu Apr 16 09:16:18 2009 +0200
@@ -10,15 +10,24 @@
#include <xen/percpu.h>
+/* A global pointer to the initial cpupool (POOL0). */
+extern struct cpupool *cpupool0;
+
+/* cpus currently in no cpupool */
+extern cpumask_t cpupool_free_cpus;
+
struct schedule_data {
spinlock_t schedule_lock; /* spinlock protecting curr */
struct vcpu *curr; /* current task */
struct vcpu *idle; /* idle task for this cpu */
void *sched_priv;
+ void *sched_idlevpriv; /* default scheduler vcpu data */
struct timer s_timer; /* scheduling timer */
} __cacheline_aligned;
DECLARE_PER_CPU(struct schedule_data, schedule_data);
+DECLARE_PER_CPU(struct scheduler *, scheduler);
+DECLARE_PER_CPU(struct cpupool *, cpupool);
static inline void vcpu_schedule_lock(struct vcpu *v)
{
@@ -58,28 +67,50 @@ struct scheduler {
char *name; /* full name for this scheduler */
char *opt_name; /* option name for this scheduler */
unsigned int sched_id; /* ID for this scheduler */
+ void *sched_data; /* global data pointer */
- void (*init) (void);
+ int (*init) (struct scheduler *);
+ void (*deinit) (struct scheduler *);
- int (*init_domain) (struct domain *);
- void (*destroy_domain) (struct domain *);
+ void (*free_vdata) (struct scheduler *, void *);
+ void * (*alloc_vdata) (struct scheduler *, struct vcpu *);
+ void (*free_pdata) (struct scheduler *, void *, int);
+ void * (*alloc_pdata) (struct scheduler *, int);
- int (*init_vcpu) (struct vcpu *);
- void (*destroy_vcpu) (struct vcpu *);
+ int (*init_domain) (struct scheduler *, struct domain *);
+ void (*destroy_domain) (struct scheduler *, struct domain *);
- void (*sleep) (struct vcpu *);
- void (*wake) (struct vcpu *);
+ int (*init_vcpu) (struct scheduler *, struct vcpu *);
+ void (*insert_vcpu) (struct scheduler *, struct vcpu *);
+ void (*destroy_vcpu) (struct scheduler *, struct vcpu *);
- struct task_slice (*do_schedule) (s_time_t);
+ void (*sleep) (struct scheduler *, struct vcpu *);
+ void (*wake) (struct scheduler *, struct vcpu *);
- int (*pick_cpu) (struct vcpu *);
- int (*adjust) (struct domain *,
+ struct task_slice (*do_schedule) (struct scheduler *, s_time_t);
+
+ int (*pick_cpu) (struct scheduler *, struct vcpu *);
+ int (*adjust) (struct scheduler *, struct domain *,
struct xen_domctl_scheduler_op *);
- void (*dump_settings) (void);
- void (*dump_cpu_state) (int);
+ void (*dump_settings) (struct scheduler *);
+ void (*dump_cpu_state) (struct scheduler *, int);
- void (*tick_suspend) (void);
- void (*tick_resume) (void);
+ void (*tick_suspend) (struct scheduler *, unsigned int);
+ void (*tick_resume) (struct scheduler *, unsigned int);
};
+struct cpupool
+{
+ int cpupool_id;
+ cpumask_t cpu_valid; /* all cpus assigned to pool */
+ cpumask_t cpus_borrowed; /* cpus borrowed or lent */
+ struct cpupool *next;
+ unsigned int n_dom;
+ int cpu_in_transit; /* used for adding/removing cpus */
+ bool_t pool_paused;
+ struct scheduler sched;
+};
+
+struct scheduler *scheduler_get_by_id(unsigned int id);
+
#endif /* __XEN_SCHED_IF_H__ */
diff -r 655dc3bc1d8e xen/include/xen/sched.h
--- a/xen/include/xen/sched.h Thu Apr 16 11:54:06 2009 +0100
+++ b/xen/include/xen/sched.h Thu Apr 16 09:14:00 2009 +0200
@@ -182,6 +182,7 @@ struct domain
/* Scheduling. */
void *sched_priv; /* scheduler-specific data */
+ struct cpupool *cpupool;
struct domain *next_in_list;
struct domain *next_in_hashbucket;
@@ -341,7 +342,7 @@ static inline struct domain *get_current
}
struct domain *domain_create(
- domid_t domid, unsigned int domcr_flags, ssidref_t ssidref);
+ domid_t domid, int poolid, unsigned int domcr_flags, ssidref_t ssidref);
/* DOMCRF_hvm: Create an HVM domain, as opposed to a PV domain. */
#define _DOMCRF_hvm 0
#define DOMCRF_hvm (1U<<_DOMCRF_hvm)
@@ -426,10 +427,11 @@ void sched_destroy_vcpu(struct vcpu *v);
void sched_destroy_vcpu(struct vcpu *v);
int sched_init_domain(struct domain *d);
void sched_destroy_domain(struct domain *d);
+void sched_move_domain(struct domain *d, struct cpupool *c);
long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *);
int sched_id(void);
-void sched_tick_suspend(void);
-void sched_tick_resume(void);
+void sched_tick_suspend(unsigned int cpu);
+void sched_tick_resume(unsigned int cpu);
void vcpu_wake(struct vcpu *d);
void vcpu_sleep_nosync(struct vcpu *d);
void vcpu_sleep_sync(struct vcpu *d);
@@ -533,8 +535,13 @@ void domain_unpause_by_systemcontroller(
void domain_unpause_by_systemcontroller(struct domain *d);
void cpu_init(void);
+struct scheduler;
+
+int schedule_init_global(char *name, struct scheduler *sched);
+void schedule_deinit_global(struct scheduler *sched);
+void schedule_cpu_switch(unsigned int cpu, struct cpupool *c);
void vcpu_force_reschedule(struct vcpu *v);
-void cpu_disable_scheduler(void);
+int cpu_disable_scheduler(unsigned int cpu, int lock);
int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
int vcpu_locked_change_affinity(struct vcpu *v, cpumask_t *affinity);
@@ -560,6 +567,21 @@ extern enum cpufreq_controller {
extern enum cpufreq_controller {
FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
} cpufreq_controller;
+
+#define CPUPOOLID_NONE -1
+
+struct cpupool *cpupool_create(int poolid, char *sched);
+int cpupool_destroy(struct cpupool *c);
+int cpupool0_cpu_assign(struct cpupool *c);
+int cpupool_assign_ncpu(struct cpupool *c, int ncpu);
+void cpupool_cpu_add(unsigned int cpu);
+int cpupool_cpu_remove(unsigned int cpu);
+int cpupool_borrow_cpu(struct cpupool *c, unsigned int cpu);
+int cpupool_return_cpu(struct cpupool *c);
+int cpupool_add_domain(struct domain *d, int poolid);
+void cpupool_rm_domain(struct domain *d);
+int cpupool_do_domctl(struct xen_domctl *op);
+#define num_cpupool_cpus(c) (cpus_weight((c)->cpu_valid))
#endif /* __SCHED_H__ */
diff -r 655dc3bc1d8e xen/common/cpupool.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/common/cpupool.c Fri Apr 17 11:01:51 2009 +0200
@@ -0,0 +1,698 @@
+/******************************************************************************
+ * cpupool.c
+ *
+ * Generic cpupool-handling functions.
+ *
+ * (C) 2009, Juergen Gross, Fujitsu Technology Solutions
+ */
+
+#include <xen/lib.h>
+#include <xen/init.h>
+#include <xen/cpumask.h>
+#include <xen/percpu.h>
+#include <xen/sched.h>
+#include <xen/sched-if.h>
+
+/* #define PRINTD(args...) printk(args) */
+
+#define for_each_cpupool(ptr) \
+ for ((ptr) = &cpupool_list; *(ptr) != NULL; (ptr) = &((*(ptr))->next))
+
+struct cpupool *cpupool0;
+cpumask_t cpupool_free_cpus;
+cpumask_t cpupool_free_cpus_borrowed;
+
+static struct cpupool *cpupool_list; /* linked list, sorted by poolid */
+
+static int cpupool0_max_cpus;
+integer_param("pool0_max_cpus", cpupool0_max_cpus);
+
+static DEFINE_SPINLOCK(cpupool_lock);
+
+DEFINE_PER_CPU(struct cpupool *, cpupool);
+
+static struct cpupool *alloc_cpupool_struct(void)
+{
+ return xmalloc(struct cpupool);
+}
+
+static void free_cpupool_struct(struct cpupool *c)
+{
+ xfree(c);
+}
+
+/*
+ * find a cpupool by it's id. to be called with cpupool lock held,
+ * returns NULL if not found.
+ */
+static struct cpupool *cpupool_find_by_id(int id, int exact)
+{
+ struct cpupool **q;
+
+ for_each_cpupool(q)
+ {
+ if ( (*q)->cpupool_id == id )
+ return *q;
+ if ( (*q)->cpupool_id > id )
+ break;
+ }
+ return exact ? NULL : *q;
+}
+
+/*
+ * create a new cpupool with specified poolid
+ * returns pointer to new cpupool structure if okay, NULL else
+ * possible failures:
+ * - no memory
+ * - poolid already used
+ * - unknown scheduler
+ */
+struct cpupool *cpupool_create(int poolid, char *sched)
+{
+ struct cpupool *c;
+ struct cpupool **q;
+ int last = 0;
+
+ if ( (c = alloc_cpupool_struct()) == NULL )
+ return NULL;
+ memset(c, 0, sizeof(*c));
+
+ PRINTD("cpupool_create(%d,%s)\n", poolid, sched);
+ spin_lock(&cpupool_lock);
+ for_each_cpupool(q)
+ {
+ last = (*q)->cpupool_id;
+ if ( (poolid != CPUPOOLID_NONE) && (last >= poolid) )
+ break;
+ }
+ if ( *q != NULL )
+ {
+ if ( (*q)->cpupool_id == poolid )
+ {
+ spin_unlock(&cpupool_lock);
+ free_cpupool_struct(c);
+ return NULL;
+ }
+ c->next = *q;
+ }
+ *q = c;
+ c->cpupool_id = (poolid == CPUPOOLID_NONE) ? (last + 1) : poolid;
+ c->cpu_in_transit = -1;
+ if ( schedule_init_global(sched, &(c->sched)) )
+ {
+ spin_unlock(&cpupool_lock);
+ cpupool_destroy(c);
+ return NULL;
+ }
+ spin_unlock(&cpupool_lock);
+
+ printk("Created cpupool %d with scheduler %s (%s)\n", c->cpupool_id,
+ c->sched.name, c->sched.opt_name);
+
+ return c;
+}
+
+/*
+ * destroys the given cpupool
+ * returns 0 on success, 1 else
+ * possible failures:
+ * - pool still in use
+ * - cpus still assigned to pool
+ * - pool not in list
+ */
+int cpupool_destroy(struct cpupool *c)
+{
+ struct cpupool **q;
+
+ spin_lock(&cpupool_lock);
+ for_each_cpupool(q)
+ if ( *q == c )
+ break;
+ if ( (*q != c) || (c->n_dom != 0) || cpus_weight(c->cpu_valid) )
+ {
+ spin_unlock(&cpupool_lock);
+ return 1;
+ }
+ *q = c->next;
+ spin_unlock(&cpupool_lock);
+ PRINTD("cpupool_destroy(%d)\n", c->cpupool_id);
+ schedule_deinit_global(&(c->sched));
+ free_cpupool_struct(c);
+ return 0;
+}
+
+/*
+ * assign a specific cpu to a cpupool
+ */
+static void cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
+{
+ PRINTD("cpupool_assign_cpu(%d,%d)\n", c->cpupool_id, cpu);
+ per_cpu(cpupool, cpu) = c;
+ schedule_cpu_switch(cpu, c);
+ cpu_clear(cpu, cpupool_free_cpus);
+ cpu_set(cpu, c->cpu_valid);
+ PRINTD("cpupool_assign_cpu(%d,%d) ready\n", c->cpupool_id, cpu);
+}
+
+/*
+ * assign free physical cpus to a cpupool
+ * cpus assigned are unused cpus with lowest possible ids
+ * returns the number of cpus assigned
+ */
+int cpupool_assign_ncpu(struct cpupool *c, int ncpu)
+{
+ int i;
+ int n;
+
+ n = 0;
+ spin_lock(&cpupool_lock);
+ for_each_cpu_mask(i, cpupool_free_cpus)
+ {
+ cpupool_assign_cpu_locked(c, i);
+ n++;
+ if ( n == ncpu )
+ break;
+ }
+ spin_unlock(&cpupool_lock);
+ PRINTD("cpupool_assign_ncpu(%d,%d) rc %d\n", c->cpupool_id, ncpu, n);
+ return n;
+}
+
+static void cpupool_unassign_cpu_locked_1(struct cpupool *c, unsigned int cpu)
+{
+ PRINTD("cpupool_unassign_cpu(%d,%d)\n", c->cpupool_id, cpu);
+ c->cpu_in_transit = cpu;
+}
+
+static int cpupool_unassign_cpu_locked_2(struct cpupool *c)
+{
+ uint64_t to = NOW() + MILLISECS(100);
+ int cpu = c->cpu_in_transit;
+ int ret;
+
+ cpu_clear(cpu, c->cpu_valid);
+ while ( ((ret = cpu_disable_scheduler(cpu, 1)) != 0) && (NOW() < to) );
+ if ( ret )
+ {
+ cpu_set(cpu, c->cpu_valid);
+ c->cpu_in_transit = -1;
+ }
+ else
+ {
+ c->cpu_in_transit = -1;
+ cpu_set(cpu, cpupool_free_cpus);
+ schedule_cpu_switch(cpu, NULL);
+ per_cpu(cpupool, cpu) = NULL;
+ }
+ PRINTD("cpupool_unassign_cpu(%d,%d) ret %d\n", c->cpupool_id, cpu, ret);
+ return ret;
+}
+
+static long cpupool_unassign_cpu_helper(void *info)
+{
+ struct cpupool *c = (struct cpupool *)info;
+ long ret;
+
+ ret = cpupool_unassign_cpu_locked_2(c);
+ spin_unlock(&cpupool_lock);
+ return ret;
+}
+
+static int cpupool_unassign_cpu_locked(struct cpupool *c, unsigned int cpu)
+{
+ cpupool_unassign_cpu_locked_1(c, cpu);
+ return cpupool_unassign_cpu_locked_2(c);
+}
+
+/*
+ * unassign a specific cpu from a cpupool
+ * possible failures:
+ * - last cpu and still domains in cpupool
+ */
+int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu)
+{
+ int work_cpu;
+
+ spin_lock(&cpupool_lock);
+ if ( !cpu_isset(cpu, c->cpu_valid) )
+ {
+ spin_unlock(&cpupool_lock);
+ return 0;
+ }
+ if ( (c->n_dom > 0) && (cpus_weight(c->cpu_valid) == 1) )
+ {
+ spin_unlock(&cpupool_lock);
+ return -EBUSY;
+ }
+ cpupool_unassign_cpu_locked_1(c, cpu);
+ work_cpu = smp_processor_id();
+ if ( work_cpu == cpu )
+ {
+ work_cpu = first_cpu(cpupool0->cpu_valid);
+ if ( work_cpu == cpu )
+ work_cpu = next_cpu(cpu, cpupool0->cpu_valid);
+ }
+ return continue_hypercall_on_cpu(work_cpu, cpupool_unassign_cpu_helper, c);
+}
+
+/*
+ * borrow cpu from another cpupool
+ * cpu might be free or already in the correct pool
+ * if cpu is taken from other pool, all domains in this pool will be paused
+ * rc == 0 if not borrowed, 1 if borrowed
+ */
+int cpupool_borrow_cpu(struct cpupool *c, unsigned int cpu)
+{
+ struct cpupool **q;
+ struct domain *d;
+
+ if ( cpu_isset(cpu, c->cpu_valid) )
+ return 0;
+
+ spin_lock(&cpupool_lock);
+
+ if ( cpu_isset(cpu, cpupool_free_cpus) )
+ {
+ cpupool_assign_cpu_locked(c, cpu);
+ cpu_set(cpu, c->cpus_borrowed);
+ cpu_set(cpu, cpupool_free_cpus_borrowed);
+ spin_unlock(&cpupool_lock);
+ return 1;
+ }
+
+ for_each_cpupool(q)
+ {
+ if ( cpu_isset(cpu, (*q)->cpu_valid) )
+ break;
+ }
+ BUG_ON(*q == NULL);
+ if ( (*q)->pool_paused++ == 0 )
+ {
+ for_each_domain(d)
+ {
+ if ( d->cpupool == *q )
+ domain_pause(d);
+ }
+ }
+ /* unassigning cpu can't fail as all domains in pool should be paused */
+ cpupool_unassign_cpu_locked(*q, cpu);
+ cpupool_assign_cpu_locked(c, cpu);
+ cpu_set(cpu, c->cpus_borrowed);
+ cpu_set(cpu, (*q)->cpus_borrowed);
+
+ spin_unlock(&cpupool_lock);
+ return 1;
+}
+
+/*
+ * return cpu after borrowing it before
+ * a cpu borrowed via cpupool_borrow_cpu before is returned to its former
+ * pool
+ * returns a cpu to continue on, -1 if all okay
+ */
+int cpupool_return_cpu(struct cpupool *c)
+{
+ int cpu = -1;
+ cpumask_t mask;
+ struct cpupool **q;
+ struct domain *d;
+
+ spin_lock(&cpupool_lock);
+ if ( cpus_weight(c->cpus_borrowed) == 0 )
+ goto out;
+
+ if ( cpu_isset(smp_processor_id(), c->cpus_borrowed) )
+ {
+ cpus_andnot(mask, c->cpu_valid, c->cpus_borrowed);
+ cpu = first_cpu(mask);
+ BUG_ON(cpu == NR_CPUS);
+ goto out;
+ }
+
+ for_each_cpu_mask(cpu, c->cpus_borrowed)
+ {
+ BUG_ON(!cpu_isset(cpu, c->cpu_valid));
+ if ( cpu_isset(cpu, cpupool_free_cpus_borrowed) )
+ {
+ cpu_clear(cpu, cpupool_free_cpus_borrowed);
+ cpu_clear(cpu, c->cpus_borrowed);
+ if ( !cpupool_unassign_cpu_locked(c, cpu) )
+ continue;
+ /* could not move all vcpus, try again */
+ cpu_set(cpu, cpupool_free_cpus_borrowed);
+ cpu_set(cpu, c->cpus_borrowed);
+ goto out;
+ }
+ for_each_cpupool(q)
+ {
+ if ( (*q != c) && cpu_isset(cpu, (*q)->cpus_borrowed) )
+ break;
+ }
+ BUG_ON(*q == NULL);
+ BUG_ON(!(*q)->pool_paused);
+ cpu_clear(cpu, (*q)->cpus_borrowed);
+ cpu_clear(cpu, c->cpus_borrowed);
+ if ( cpupool_unassign_cpu_locked(c, cpu) )
+ {
+ cpu_set(cpu, (*q)->cpus_borrowed);
+ cpu_set(cpu, c->cpus_borrowed);
+ goto out;
+ }
+ cpupool_assign_cpu_locked(*q, cpu);
+ if ( (*q)->pool_paused == 1 )
+ {
+ for_each_domain(d)
+ {
+ if ( d->cpupool == *q )
+ domain_unpause(d);
+ }
+ }
+ (*q)->pool_paused--;
+ }
+ cpu = -1;
+
+out:
+ spin_unlock(&cpupool_lock);
+ return cpu;
+}
+
+/*
+ * assign cpus to the default cpupool
+ * default are all cpus, less cpus may be specified as boot parameter
+ * possible failures:
+ * - no cpu assigned
+ */
+int __init cpupool0_cpu_assign(struct cpupool *c)
+{
+ if ( (cpupool0_max_cpus == 0) || (cpupool0_max_cpus > num_online_cpus()) )
+ cpupool0_max_cpus = num_online_cpus();
+ if ( !cpupool_assign_ncpu(cpupool0, cpupool0_max_cpus) )
+ return 1;
+ return 0;
+}
+
+/*
+ * add a new domain to a cpupool
+ * possible failures:
+ * - pool does not exist
+ * - pool is paused
+ * - no cpu assigned to pool
+ */
+int cpupool_add_domain(struct domain *d, int poolid)
+{
+ struct cpupool *c;
+ int rc = 1;
+
+ if ( poolid == CPUPOOLID_NONE )
+ return 0;
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(poolid, 1);
+ if ( (c != NULL) && !c->pool_paused && cpus_weight(c->cpu_valid) )
+ {
+ c->n_dom++;
+ d->cpupool = c;
+ PRINTD("cpupool_add_domain(%d,%d) n_dom %d\n", d->domain_id, poolid,
+ c->n_dom);
+ rc = 0;
+ }
+ spin_unlock(&cpupool_lock);
+ return rc;
+}
+
+/*
+ * remove a domain from a cpupool
+ */
+void cpupool_rm_domain(struct domain *d)
+{
+ if ( d->cpupool == NULL )
+ return;
+ spin_lock(&cpupool_lock);
+ d->cpupool->n_dom--;
+ PRINTD("cpupool_rm_domain(%d,%d) n_dom %d\n", d->domain_id,
+ d->cpupool->cpupool_id, d->cpupool->n_dom);
+ d->cpupool = NULL;
+ spin_unlock(&cpupool_lock);
+ return;
+}
+
+/*
+ * called to add a new cpu to pool admin
+ * we add a hotplugged cpu to the cpupool0 to be able to add it to dom0
+ */
+void cpupool_cpu_add(unsigned int cpu)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ if ( cpupool0 == NULL )
+ return;
+ spin_lock(&cpupool_lock);
+ cpu_set(cpu, cpupool_free_cpus);
+ cpupool_assign_cpu_locked(cpupool0, cpu);
+ spin_unlock(&cpupool_lock);
+#endif
+ return;
+}
+
+/* called to remove a cpu from pool admin
+ * possible failures:
+ * - cpu is last one in a pool with domains in it
+ * - pool is paused
+ */
+int cpupool_cpu_remove(unsigned int cpu)
+{
+ int rc = 0;
+#ifdef CONFIG_HOTPLUG_CPU
+ struct cpupool **q;
+
+ spin_lock(&cpupool_lock);
+ if ( cpu_isset(cpu, cpupool_free_cpus) )
+ {
+ cpu_clear(cpu, cpupool_free_cpus);
+ goto out;
+ }
+ for_each_cpupool(q)
+ if ( cpu_isset(cpu, (*q)->cpu_valid) )
+ break;
+ if ( *q == NULL )
+ goto out;
+ if ( (((*q)->n_dom == 0) || (cpus_weight((*q)->cpu_valid) > 1)) &&
+ !(*q)->pool_paused )
+ {
+ cpu_clear(cpu, (*q)->cpu_valid);
+ schedule_cpu_switch(cpu, NULL);
+ per_cpu(cpupool, cpu) = NULL;
+ }
+ else
+ rc = 1;
+out:
+ spin_unlock(&cpupool_lock);
+#endif
+ return rc;
+}
+
+/*
+ * do cpupool related domctl operations
+ */
+int cpupool_do_domctl(struct xen_domctl *op)
+{
+ int ret;
+ struct cpupool *c;
+
+ switch ( op->u.cpupool_op.op )
+ {
+
+ case XEN_DOMCTL_CPUPOOL_OP_CREATE:
+ {
+ int poolid;
+ struct scheduler *sched;
+
+ poolid = (op->u.cpupool_op.cpupool_id == XEN_DOMCTL_CPUPOOL_PAR_ANY) ?
+ CPUPOOLID_NONE: op->u.cpupool_op.cpupool_id;
+ sched = scheduler_get_by_id(op->u.cpupool_op.sched_id);
+ ret = -ENOENT;
+ if ( sched == NULL )
+ break;
+ ret = 0;
+ c = cpupool_create(poolid, sched->opt_name);
+ if ( c == NULL )
+ ret = -EINVAL;
+ else
+ op->u.cpupool_op.cpupool_id = c->cpupool_id;
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_DESTROY:
+ {
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->u.cpupool_op.cpupool_id, 1);
+ spin_unlock(&cpupool_lock);
+ ret = -ENOENT;
+ if ( c == NULL )
+ break;
+ ret = (cpupool_destroy(c) != 0) ? -EBUSY : 0;
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_INFO:
+ {
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->u.cpupool_op.cpupool_id, 0);
+ spin_unlock(&cpupool_lock);
+ ret = -ENOENT;
+ if ( c == NULL )
+ break;
+ op->u.cpupool_op.cpupool_id = c->cpupool_id;
+ op->u.cpupool_op.sched_id = c->sched.sched_id;
+ op->u.cpupool_op.n_dom = c->n_dom;
+ cpumask_to_xenctl_cpumap(&(op->u.cpupool_op.cpumap), &(c->cpu_valid));
+ ret = 0;
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_ADDCPU:
+ {
+ unsigned cpu;
+
+ cpu = op->u.cpupool_op.cpu;
+ spin_lock(&cpupool_lock);
+ if ( cpu == XEN_DOMCTL_CPUPOOL_PAR_ANY )
+ cpu = first_cpu(cpupool_free_cpus);
+ ret = -EINVAL;
+ if ( cpu >= NR_CPUS )
+ goto addcpu_out;
+ ret = -EBUSY;
+ if ( !cpu_isset(cpu, cpupool_free_cpus) )
+ goto addcpu_out;
+ c = cpupool_find_by_id(op->u.cpupool_op.cpupool_id, 0);
+ ret = -ENOENT;
+ if ( c == NULL )
+ goto addcpu_out;
+ cpupool_assign_cpu_locked(c, cpu);
+ ret = 0;
+addcpu_out:
+ spin_unlock(&cpupool_lock);
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_RMCPU:
+ {
+ unsigned cpu;
+
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->u.cpupool_op.cpupool_id, 0);
+ spin_unlock(&cpupool_lock);
+ ret = -ENOENT;
+ if ( c == NULL )
+ break;
+ cpu = op->u.cpupool_op.cpu;
+ if ( cpu == XEN_DOMCTL_CPUPOOL_PAR_ANY )
+ cpu = last_cpu(c->cpu_valid);
+ ret = -EINVAL;
+ if ( cpu >= NR_CPUS )
+ break;
+ /* caution: cpupool_unassign_cpu uses continue_hypercall_on_cpu and
+ * will continue after the local return
+ */
+ ret = cpupool_unassign_cpu(c, cpu);
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_MOVEDOMAIN:
+ {
+ struct domain *d;
+
+ ret = -EINVAL;
+ if ( op->u.cpupool_op.domid == 0 )
+ break;
+ ret = -ESRCH;
+ d = rcu_lock_domain_by_id(op->u.cpupool_op.domid);
+ if ( d == NULL )
+ break;
+ if ( d->cpupool == NULL )
+ {
+ ret = -EINVAL;
+ rcu_unlock_domain(d);
+ break;
+ }
+ ret = -ENOENT;
+ spin_lock(&cpupool_lock);
+ c = cpupool_find_by_id(op->u.cpupool_op.cpupool_id, 1);
+ if ( (c != NULL) && cpus_weight(c->cpu_valid) && !c->pool_paused )
+ {
+ PRINTD("cpupool move_domain(%d)->%d\n", d->domain_id,
+ c->cpupool_id);
+ d->cpupool->n_dom--;
+ PRINTD("cpupool move_domain(%d), %d.n_dom=%d\n", d->domain_id,
+ d->cpupool->cpupool_id, d->cpupool->n_dom);
+ sched_move_domain(d, c);
+ c->n_dom++;
+ PRINTD("cpupool move_domain(%d), %d.n_dom=%d\n", d->domain_id,
+ c->cpupool_id, c->n_dom);
+ PRINTD("cpupool move_domain(%d)->%d ready\n", d->domain_id,
+ c->cpupool_id);
+ ret = 0;
+ }
+ spin_unlock(&cpupool_lock);
+ rcu_unlock_domain(d);
+ }
+ break;
+
+ case XEN_DOMCTL_CPUPOOL_OP_FREEINFO:
+ {
+ cpumask_to_xenctl_cpumap(&(op->u.cpupool_op.cpumap),
+ &cpupool_free_cpus);
+ ret = 0;
+ }
+ break;
+
+ default:
+ ret = -ENOSYS;
+
+ }
+
+ return ret;
+}
+
+void schedule_dump(struct cpupool *c);
+
+void dump_runq(unsigned char key)
+{
+ unsigned long flags;
+ s_time_t now = NOW();
+ struct cpupool **c;
+
+ spin_lock(&cpupool_lock);
+ local_irq_save(flags);
+
+ printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now);
+
+ printk("Idle cpupool:\n");
+ schedule_dump(NULL);
+
+ for_each_cpupool(c)
+ {
+ printk("Cpupool %d:\n", (*c)->cpupool_id);
+ schedule_dump(*c);
+ }
+
+ local_irq_restore(flags);
+ spin_unlock(&cpupool_lock);
+}
+
+static int __init cpupool_init(void)
+{
+ cpupool_free_cpus = cpu_online_map;
+ cpus_clear(cpupool_free_cpus_borrowed);
+ cpupool_list = NULL;
+ return 0;
+}
+__initcall(cpupool_init);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|