Keir Fraser wrote:
> On 13/04/2010 14:49, "Juergen Gross" <juergen.gross@xxxxxxxxxxxxxx> wrote:
>
>>>> 1. make continue_hypercall_on_cpu available on ia64, too
>>>> 2. make cpupools a pure x86 feature
>>> Is there anything meaningful in the new code that's really x86-specific
>>> (i.e. can't the whole code chunk be moved to xen/common/)?
>> My main concern is the difference in the handling of schedule_tail. I just
>> can't tell how to change the ia64 variant to make it work correctly.
>
> We were messing with schedule_tail in order to get control into the
> hypercall continuation in the context of the same vcpu but on a different
> pcpu. Now that you just execute the continuation in a straightforward
> softirq/tasklet context, you don't need to mess with schedule_tail at all...
> Right?
Aah, you mean something like the attached patch?
Juergen
--
Juergen Gross Principal Developer Operating Systems
TSP ES&S SWE OS6 Telephone: +49 (0) 89 3222 2967
Fujitsu Technology Solutions e-mail: juergen.gross@xxxxxxxxxxxxxx
Domagkstr. 28 Internet: ts.fujitsu.com
D-80807 Muenchen Company details: ts.fujitsu.com/imprint.html
Signed-off-by: juergen.gross@xxxxxxxxxxxxxx
# HG changeset patch
# User juergen.gross@xxxxxxxxxxxxxx
# Date 1271227921 -7200
# Node ID bbb8e0f142df0f8a26df90964c1b27414b9839ea
# Parent c02cc832cb2d88c383d33c1ba50c381fae703308
rework of continue_hypercall_on_cpu to use tasklets, make architecture
independant
diff -r c02cc832cb2d -r bbb8e0f142df xen/arch/x86/acpi/power.c
--- a/xen/arch/x86/acpi/power.c Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/arch/x86/acpi/power.c Wed Apr 14 08:52:01 2010 +0200
@@ -234,7 +234,7 @@
return error;
}
-static long enter_state_helper(void *data)
+static long enter_state_helper(void *hdl, void *data)
{
struct acpi_sleep_info *sinfo = (struct acpi_sleep_info *)data;
return enter_state(sinfo->sleep_state);
@@ -265,7 +265,7 @@
acpi_sinfo.pm1b_cnt_val = sleep->pm1b_cnt_val;
acpi_sinfo.sleep_state = sleep->sleep_state;
- return continue_hypercall_on_cpu(0, enter_state_helper, &acpi_sinfo);
+ return continue_hypercall_on_cpu(0, NULL, enter_state_helper, &acpi_sinfo);
}
static int acpi_get_wake_status(void)
diff -r c02cc832cb2d -r bbb8e0f142df xen/arch/x86/domain.c
--- a/xen/arch/x86/domain.c Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/arch/x86/domain.c Wed Apr 14 08:52:01 2010 +0200
@@ -1517,82 +1517,6 @@
flush_tlb_mask(&v->vcpu_dirty_cpumask);
}
-struct migrate_info {
- long (*func)(void *data);
- void *data;
- void (*saved_schedule_tail)(struct vcpu *);
- cpumask_t saved_affinity;
- unsigned int nest;
-};
-
-static void continue_hypercall_on_cpu_helper(struct vcpu *v)
-{
- struct cpu_user_regs *regs = guest_cpu_user_regs();
- struct migrate_info *info = v->arch.continue_info;
- cpumask_t mask = info->saved_affinity;
- void (*saved_schedule_tail)(struct vcpu *) = info->saved_schedule_tail;
-
- regs->eax = info->func(info->data);
-
- if ( info->nest-- == 0 )
- {
- xfree(info);
- v->arch.schedule_tail = saved_schedule_tail;
- v->arch.continue_info = NULL;
- vcpu_unlock_affinity(v, &mask);
- }
-
- (*saved_schedule_tail)(v);
-}
-
-int continue_hypercall_on_cpu(int cpu, long (*func)(void *data), void *data)
-{
- struct vcpu *v = current;
- struct migrate_info *info;
- cpumask_t mask = cpumask_of_cpu(cpu);
- int rc;
-
- if ( cpu == smp_processor_id() )
- return func(data);
-
- info = v->arch.continue_info;
- if ( info == NULL )
- {
- info = xmalloc(struct migrate_info);
- if ( info == NULL )
- return -ENOMEM;
-
- rc = vcpu_lock_affinity(v, &mask);
- if ( rc )
- {
- xfree(info);
- return rc;
- }
-
- info->saved_schedule_tail = v->arch.schedule_tail;
- info->saved_affinity = mask;
- info->nest = 0;
-
- v->arch.schedule_tail = continue_hypercall_on_cpu_helper;
- v->arch.continue_info = info;
- }
- else
- {
- BUG_ON(info->nest != 0);
- rc = vcpu_locked_change_affinity(v, &mask);
- if ( rc )
- return rc;
- info->nest++;
- }
-
- info->func = func;
- info->data = data;
-
- /* Dummy return value will be overwritten by new schedule_tail. */
- BUG_ON(!test_bit(SCHEDULE_SOFTIRQ, &softirq_pending(smp_processor_id())));
- return 0;
-}
-
#define next_arg(fmt, args) ({ \
unsigned long __arg; \
switch ( *(fmt)++ ) \
diff -r c02cc832cb2d -r bbb8e0f142df xen/arch/x86/microcode.c
--- a/xen/arch/x86/microcode.c Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/arch/x86/microcode.c Wed Apr 14 08:52:01 2010 +0200
@@ -28,6 +28,7 @@
#include <xen/sched.h>
#include <xen/smp.h>
#include <xen/spinlock.h>
+#include <xen/domain.h>
#include <xen/guest_access.h>
#include <asm/current.h>
@@ -116,7 +117,7 @@
return err;
}
-static long do_microcode_update(void *_info)
+static long do_microcode_update(void *hdl, void *_info)
{
struct microcode_info *info = _info;
int error;
@@ -129,7 +130,8 @@
info->cpu = next_cpu(info->cpu, cpu_online_map);
if ( info->cpu < NR_CPUS )
- return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
+ return continue_hypercall_on_cpu(info->cpu, hdl,
+ do_microcode_update, info);
error = info->error;
xfree(info);
@@ -162,5 +164,6 @@
info->error = 0;
info->cpu = first_cpu(cpu_online_map);
- return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
+ return continue_hypercall_on_cpu(info->cpu, NULL,
+ do_microcode_update, info);
}
diff -r c02cc832cb2d -r bbb8e0f142df xen/arch/x86/platform_hypercall.c
--- a/xen/arch/x86/platform_hypercall.c Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/arch/x86/platform_hypercall.c Wed Apr 14 08:52:01 2010 +0200
@@ -48,12 +48,12 @@
extern int set_px_pminfo(uint32_t cpu, struct xen_processor_performance *perf);
extern long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power);
-static long cpu_frequency_change_helper(void *data)
+static long cpu_frequency_change_helper(void *hdl, void *data)
{
return cpu_frequency_change(this_cpu(freq));
}
-static long cpu_down_helper(void *data)
+static long cpu_down_helper(void *hdl, void *data)
{
int cpu = (unsigned long)data;
return cpu_down(cpu);
@@ -314,7 +314,7 @@
if ( op->u.change_freq.flags || !cpu_online(op->u.change_freq.cpu) )
break;
per_cpu(freq, op->u.change_freq.cpu) = op->u.change_freq.freq;
- ret = continue_hypercall_on_cpu(op->u.change_freq.cpu,
+ ret = continue_hypercall_on_cpu(op->u.change_freq.cpu, NULL,
cpu_frequency_change_helper,
NULL);
break;
@@ -467,7 +467,7 @@
break;
}
ret = continue_hypercall_on_cpu(
- 0, cpu_down_helper, (void *)(unsigned long)cpu);
+ 0, NULL, cpu_down_helper, (void *)(unsigned long)cpu);
break;
}
break;
diff -r c02cc832cb2d -r bbb8e0f142df xen/arch/x86/sysctl.c
--- a/xen/arch/x86/sysctl.c Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/arch/x86/sysctl.c Wed Apr 14 08:52:01 2010 +0200
@@ -14,6 +14,7 @@
#include <public/sysctl.h>
#include <xen/sched.h>
#include <xen/event.h>
+#include <xen/domain.h>
#include <xen/domain_page.h>
#include <asm/msr.h>
#include <xen/trace.h>
@@ -29,7 +30,7 @@
#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0)
-static long cpu_down_helper(void *data)
+static long cpu_down_helper(void *hdl, void *data)
{
int cpu = (unsigned long)data;
return cpu_down(cpu);
@@ -186,7 +187,7 @@
break;
case XEN_SYSCTL_CPU_HOTPLUG_OFFLINE:
ret = continue_hypercall_on_cpu(
- 0, cpu_down_helper, (void *)(unsigned long)cpu);
+ 0, NULL, cpu_down_helper, (void *)(unsigned long)cpu);
break;
case XEN_SYSCTL_CPU_HOTPLUG_STATUS:
ret = 0;
diff -r c02cc832cb2d -r bbb8e0f142df xen/common/domain.c
--- a/xen/common/domain.c Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/common/domain.c Wed Apr 14 08:52:01 2010 +0200
@@ -898,6 +898,76 @@
return -ENOSYS;
}
+struct migrate_info {
+ long (*func)(void *hdl, void *data);
+ void *data;
+ volatile int nest;
+ struct cpu_user_regs volatile *regs;
+ struct vcpu *v;
+};
+
+static void continue_hypercall_on_cpu_tasklet(struct migrate_info *info)
+{
+ struct vcpu *v = info->v;
+
+ while ( get_return_reg(info->regs) )
+ cpu_relax();
+
+ /* vcpu is paused now, return value can be set */
+ set_return_reg(info->regs, info->func((void *)info, info->data));
+
+ if ( info->nest-- == 0 )
+ {
+ xfree(info);
+ vcpu_unpause(v);
+ }
+
+ return;
+}
+
+int continue_hypercall_on_cpu(int cpu, void *hdl,
+ long (*func)(void *hdl, void *data), void *data)
+{
+ struct vcpu *v = current;
+ struct migrate_info *info = (struct migrate_info *)hdl;
+
+ if ( cpu == smp_processor_id() )
+ return func(info, data);
+
+ if ( info == NULL )
+ {
+ info = xmalloc(struct migrate_info);
+ if ( info == NULL )
+ return -ENOMEM;
+
+ info->nest = 0;
+ info->v = v;
+ info->regs = guest_cpu_user_regs();
+ tasklet_init(&v->cont_tasklet,
+ (void(*)(unsigned long))continue_hypercall_on_cpu_tasklet,
+ (unsigned long)info);
+
+ vcpu_pause_nosync(v);
+ }
+ else
+ {
+ v = info->v;
+ BUG_ON(info->nest != 0);
+ info->nest++;
+ }
+
+ info->func = func;
+ info->data = data;
+
+ set_return_reg(info->regs, -EBUSY);
+
+ tasklet_schedule_cpu(&v->cont_tasklet, cpu);
+ raise_softirq(SCHEDULE_SOFTIRQ);
+
+ /* Dummy return value will be overwritten by tasklet. */
+ return 0;
+}
+
/*
* Local variables:
* mode: C
diff -r c02cc832cb2d -r bbb8e0f142df xen/common/schedule.c
--- a/xen/common/schedule.c Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/common/schedule.c Wed Apr 14 08:52:01 2010 +0200
@@ -408,26 +408,18 @@
}
}
-static int __vcpu_set_affinity(
- struct vcpu *v, cpumask_t *affinity,
- bool_t old_lock_status, bool_t new_lock_status)
+int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
{
cpumask_t online_affinity, old_affinity;
+
+ if ( v->domain->is_pinned )
+ return -EINVAL;
cpus_and(online_affinity, *affinity, cpu_online_map);
if ( cpus_empty(online_affinity) )
return -EINVAL;
vcpu_schedule_lock_irq(v);
-
- if ( v->affinity_locked != old_lock_status )
- {
- BUG_ON(!v->affinity_locked);
- vcpu_schedule_unlock_irq(v);
- return -EBUSY;
- }
-
- v->affinity_locked = new_lock_status;
old_affinity = v->cpu_affinity;
v->cpu_affinity = *affinity;
@@ -444,36 +436,6 @@
}
return 0;
-}
-
-int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
-{
- if ( v->domain->is_pinned )
- return -EINVAL;
- return __vcpu_set_affinity(v, affinity, 0, 0);
-}
-
-int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity)
-{
- return __vcpu_set_affinity(v, affinity, 0, 1);
-}
-
-int vcpu_locked_change_affinity(struct vcpu *v, cpumask_t *affinity)
-{
- return __vcpu_set_affinity(v, affinity, 1, 1);
-}
-
-void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity)
-{
- cpumask_t online_affinity;
-
- /* Do not fail if no CPU in old affinity mask is online. */
- cpus_and(online_affinity, *affinity, cpu_online_map);
- if ( cpus_empty(online_affinity) )
- *affinity = cpu_online_map;
-
- if ( __vcpu_set_affinity(v, affinity, 1, 0) != 0 )
- BUG();
}
/* Block the currently-executing domain until a pertinent event occurs. */
diff -r c02cc832cb2d -r bbb8e0f142df xen/common/softirq.c
--- a/xen/common/softirq.c Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/common/softirq.c Wed Apr 14 08:52:01 2010 +0200
@@ -88,9 +88,11 @@
}
static LIST_HEAD(tasklet_list);
+static DEFINE_PER_CPU(struct list_head, tasklet_list_pcpu);
static DEFINE_SPINLOCK(tasklet_lock);
-void tasklet_schedule(struct tasklet *t)
+static void tasklet_schedule_list(struct tasklet *t, struct list_head *tlist,
+ int cpu)
{
unsigned long flags;
@@ -101,28 +103,47 @@
if ( !t->is_scheduled && !t->is_running )
{
BUG_ON(!list_empty(&t->list));
- list_add_tail(&t->list, &tasklet_list);
+ list_add_tail(&t->list, tlist);
+ t->scheduled_on = NR_CPUS;
}
t->is_scheduled = 1;
- raise_softirq(TASKLET_SOFTIRQ);
+ if ( cpu == smp_processor_id() )
+ raise_softirq(TASKLET_SOFTIRQ);
+ else if ( !t->is_running )
+ cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
+ else
+ t->scheduled_on = cpu;
}
spin_unlock_irqrestore(&tasklet_lock, flags);
}
+void tasklet_schedule(struct tasklet *t)
+{
+ tasklet_schedule_list(t, &tasklet_list, smp_processor_id());
+}
+
+void tasklet_schedule_cpu(struct tasklet *t, int cpu)
+{
+ tasklet_schedule_list(t, &per_cpu(tasklet_list_pcpu, cpu), cpu);
+}
+
static void tasklet_action(void)
{
+ struct list_head *tlist;
struct tasklet *t;
spin_lock_irq(&tasklet_lock);
- if ( list_empty(&tasklet_list) )
+ tlist = ( list_empty(&this_cpu(tasklet_list_pcpu)) ) ? &tasklet_list :
+ &this_cpu(tasklet_list_pcpu);
+ if ( list_empty(tlist) )
{
spin_unlock_irq(&tasklet_lock);
return;
}
- t = list_entry(tasklet_list.next, struct tasklet, list);
+ t = list_entry(tlist->next, struct tasklet, list);
list_del_init(&t->list);
BUG_ON(t->is_dead || t->is_running || !t->is_scheduled);
@@ -138,14 +159,23 @@
if ( t->is_scheduled )
{
BUG_ON(t->is_dead || !list_empty(&t->list));
- list_add_tail(&t->list, &tasklet_list);
+ if ( t->scheduled_on >= NR_CPUS )
+ list_add_tail(&t->list, tlist);
+ else
+ {
+ unsigned int cpu = t->scheduled_on;
+
+ list_add_tail(&t->list, &per_cpu(tasklet_list_pcpu, cpu));
+ cpu_raise_softirq(cpu, TASKLET_SOFTIRQ);
+ }
}
/*
* If there is more work to do then reschedule. We don't grab more work
* immediately as we want to allow other softirq work to happen first.
*/
- if ( !list_empty(&tasklet_list) )
+ if ( !list_empty(&tasklet_list) ||
+ !list_empty(&this_cpu(tasklet_list_pcpu)) )
raise_softirq(TASKLET_SOFTIRQ);
spin_unlock_irq(&tasklet_lock);
@@ -186,6 +216,12 @@
void __init softirq_init(void)
{
+ int i;
+
+ for_each_possible_cpu ( i )
+ {
+ INIT_LIST_HEAD(&per_cpu(tasklet_list_pcpu, i));
+ }
open_softirq(TASKLET_SOFTIRQ, tasklet_action);
}
diff -r c02cc832cb2d -r bbb8e0f142df xen/include/asm-ia64/regs.h
--- a/xen/include/asm-ia64/regs.h Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/include/asm-ia64/regs.h Wed Apr 14 08:52:01 2010 +0200
@@ -1,1 +1,4 @@
#include <asm/ptrace.h>
+
+#define get_return_reg(r) r->r8
+#define set_return_reg(r, val) r->r8 = val
diff -r c02cc832cb2d -r bbb8e0f142df xen/include/asm-x86/domain.h
--- a/xen/include/asm-x86/domain.h Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/include/asm-x86/domain.h Wed Apr 14 08:52:01 2010 +0200
@@ -451,9 +451,6 @@
#define hvm_vmx hvm_vcpu.u.vmx
#define hvm_svm hvm_vcpu.u.svm
-/* Continue the current hypercall via func(data) on specified cpu. */
-int continue_hypercall_on_cpu(int cpu, long (*func)(void *data), void *data);
-
void vcpu_show_execution_state(struct vcpu *);
void vcpu_show_registers(const struct vcpu *);
diff -r c02cc832cb2d -r bbb8e0f142df xen/include/asm-x86/regs.h
--- a/xen/include/asm-x86/regs.h Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/include/asm-x86/regs.h Wed Apr 14 08:52:01 2010 +0200
@@ -19,4 +19,7 @@
(diff == 0); \
})
+#define get_return_reg(r) r->eax
+#define set_return_reg(r, val) r->eax = val
+
#endif /* __X86_REGS_H__ */
diff -r c02cc832cb2d -r bbb8e0f142df xen/include/xen/domain.h
--- a/xen/include/xen/domain.h Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/include/xen/domain.h Wed Apr 14 08:52:01 2010 +0200
@@ -62,6 +62,10 @@
bool_t domctl_lock_acquire(void);
void domctl_lock_release(void);
+/* Continue the current hypercall via func(data) on specified cpu. */
+int continue_hypercall_on_cpu(int cpu, void *hdl,
+ long (*func)(void *hdl, void *data), void *data);
+
extern unsigned int xen_processor_pmbits;
#endif /* __XEN_DOMAIN_H__ */
diff -r c02cc832cb2d -r bbb8e0f142df xen/include/xen/sched.h
--- a/xen/include/xen/sched.h Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/include/xen/sched.h Wed Apr 14 08:52:01 2010 +0200
@@ -132,8 +132,6 @@
bool_t defer_shutdown;
/* VCPU is paused following shutdown request (d->is_shutting_down)? */
bool_t paused_for_shutdown;
- /* VCPU affinity is temporarily locked from controller changes? */
- bool_t affinity_locked;
/*
* > 0: a single port is being polled;
@@ -156,6 +154,9 @@
/* Bitmask of CPUs which are holding onto this VCPU's state. */
cpumask_t vcpu_dirty_cpumask;
+
+ /* tasklet for continue_hypercall_on_cpu() */
+ struct tasklet cont_tasklet;
struct arch_vcpu arch;
};
@@ -581,9 +582,6 @@
void vcpu_force_reschedule(struct vcpu *v);
void cpu_disable_scheduler(void);
int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity);
-int vcpu_lock_affinity(struct vcpu *v, cpumask_t *affinity);
-int vcpu_locked_change_affinity(struct vcpu *v, cpumask_t *affinity);
-void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);
void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
uint64_t get_cpu_idle_time(unsigned int cpu);
diff -r c02cc832cb2d -r bbb8e0f142df xen/include/xen/softirq.h
--- a/xen/include/xen/softirq.h Tue Apr 13 18:19:33 2010 +0100
+++ b/xen/include/xen/softirq.h Wed Apr 14 08:52:01 2010 +0200
@@ -50,14 +50,17 @@
bool_t is_scheduled;
bool_t is_running;
bool_t is_dead;
+ unsigned int scheduled_on;
void (*func)(unsigned long);
unsigned long data;
};
#define DECLARE_TASKLET(name, func, data) \
- struct tasklet name = { LIST_HEAD_INIT(name.list), 0, 0, 0, func, data }
+ struct tasklet name = { LIST_HEAD_INIT(name.list), 0, 0, 0, NR_CPUS, \
+ func, data }
void tasklet_schedule(struct tasklet *t);
+void tasklet_schedule_cpu(struct tasklet *t, int cpu);
void tasklet_kill(struct tasklet *t);
void tasklet_init(
struct tasklet *t, void (*func)(unsigned long), unsigned long data);
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|