xen-devel
Re: [Xen-devel] [PATCH 5/7] xen: Make event channel work with PV extensi
To: |
Sheng Yang <sheng@xxxxxxxxxxxxxxx> |
Subject: |
Re: [Xen-devel] [PATCH 5/7] xen: Make event channel work with PV extension of HVM |
From: |
Jeremy Fitzhardinge <jeremy@xxxxxxxx> |
Date: |
Mon, 01 Mar 2010 17:38:21 -0800 |
Cc: |
xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxx>, Ian Campbell <Ian.Campbell@xxxxxxxxxx>, Stefano Stabellini <stefano.stabellini@xxxxxxxxxxxxx>, linux-kernel@xxxxxxxxxxxxxxx, Ian Pratt <Ian.Pratt@xxxxxxxxxxxxx>, Jeremy Fitzhardinge <jeremy.fitzhardinge@xxxxxxxxxx>, Keir Fraser <keir.fraser@xxxxxxxxxxxxx> |
Delivery-date: |
Mon, 01 Mar 2010 17:39:42 -0800 |
Envelope-to: |
www-data@xxxxxxxxxxxxxxxxxxx |
In-reply-to: |
<1267436315-24486-6-git-send-email-sheng@xxxxxxxxxxxxxxx> |
List-help: |
<mailto:xen-devel-request@lists.xensource.com?subject=help> |
List-id: |
Xen developer discussion <xen-devel.lists.xensource.com> |
List-post: |
<mailto:xen-devel@lists.xensource.com> |
List-subscribe: |
<http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe> |
List-unsubscribe: |
<http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe> |
References: |
<1267436315-24486-1-git-send-email-sheng@xxxxxxxxxxxxxxx> <1267436315-24486-6-git-send-email-sheng@xxxxxxxxxxxxxxx> |
Sender: |
xen-devel-bounces@xxxxxxxxxxxxxxxxxxx |
User-agent: |
Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.1.7) Gecko/20100120 Fedora/3.0.1-1.fc12 Lightning/1.0b2pre Thunderbird/3.0.1 |
On 03/01/2010 01:38 AM, Sheng Yang wrote:
We mapped each IOAPIC pin to a VIRQ, so that we can deliver interrupt through
these VIRQs.
We used X86_PLATFORM_IPI_VECTOR as the noficiation vector for hypervisor
to notify guest about the event.
I don't understand this aspect of the patch. Can you explain the
interrupt delivery path when HVM event channels are enabled?
The Xen PV timer is used to provide guest a reliable timer.
The patch also enabled SMP support, then we can support IPI through evtchn as
well.
Then we don't use IOAPIC/LAPIC.
Signed-off-by: Sheng Yang<sheng@xxxxxxxxxxxxxxx>
---
arch/x86/xen/enlighten.c | 72 +++++++++++++++++++++
arch/x86/xen/irq.c | 37 ++++++++++-
arch/x86/xen/smp.c | 144 ++++++++++++++++++++++++++++++++++++++++++-
arch/x86/xen/xen-ops.h | 3 +
drivers/xen/events.c | 66 ++++++++++++++++++-
include/xen/events.h | 1 +
include/xen/hvm.h | 5 ++
include/xen/interface/xen.h | 6 ++-
8 files changed, 326 insertions(+), 8 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index fdb9664..f617639 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -58,6 +58,9 @@
#include<asm/reboot.h>
#include<asm/stackprotector.h>
+#include<xen/hvm.h>
+#include<xen/events.h>
+
#include "xen-ops.h"
#include "mmu.h"
#include "multicalls.h"
@@ -1212,6 +1215,8 @@ static void __init xen_hvm_pv_banner(void)
pv_info.name);
printk(KERN_INFO "Xen version: %d.%d%s\n",
version>> 16, version& 0xffff, extra.extraversion);
+ if (xen_hvm_pv_evtchn_enabled())
+ printk(KERN_INFO "PV feature: Event channel enabled\n");
}
static int xen_para_available(void)
@@ -1256,6 +1261,9 @@ static int init_hvm_pv_info(void)
xen_hvm_pv_status = XEN_HVM_PV_ENABLED;
+ if (edx& XEN_CPUID_FEAT2_HVM_PV_EVTCHN)
+ xen_hvm_pv_status |= XEN_HVM_PV_EVTCHN_ENABLED;
+
/* We only support 1 page of hypercall for now */
if (pages != 1)
return -ENOMEM;
@@ -1292,12 +1300,42 @@ static void __init init_shared_info(void)
per_cpu(xen_vcpu, 0) =&HYPERVISOR_shared_info->vcpu_info[0];
}
+static int set_callback_via(uint64_t via)
+{
+ struct xen_hvm_param a;
+
+ a.domid = DOMID_SELF;
+ a.index = HVM_PARAM_CALLBACK_IRQ;
+ a.value = via;
+ return HYPERVISOR_hvm_op(HVMOP_set_param,&a);
+}
+
+void do_hvm_pv_evtchn_intr(void)
+{
+#ifdef CONFIG_X86_64
+ per_cpu(irq_count, smp_processor_id())++;
+#endif
+ xen_evtchn_do_upcall(get_irq_regs());
+#ifdef CONFIG_X86_64
+ per_cpu(irq_count, smp_processor_id())--;
+#endif
+}
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static void xen_hvm_pv_evtchn_apic_write(u32 reg, u32 val)
+{
+ /* The only one reached here should be EOI */
+ WARN_ON(reg != APIC_EOI);
+}
+#endif
+
void __init xen_guest_init(void)
{
#ifdef CONFIG_X86_32
return;
#else
int r;
+ uint64_t callback_via;
/* Ensure the we won't confused with PV */
if (xen_domain_type == XEN_PV_DOMAIN)
@@ -1310,6 +1348,40 @@ void __init xen_guest_init(void)
init_shared_info();
xen_hvm_pv_init_irq_ops();
+
+ if (xen_hvm_pv_evtchn_enabled()) {
+ if (enable_hvm_pv(HVM_PV_EVTCHN))
If this fails, shouldn't it also clear XEN_HVM_PV_EVTCHN_ENABLED?
+ return;
+
+ pv_time_ops = xen_time_ops;
+
+ x86_init.timers.timer_init = xen_time_init;
+ x86_init.timers.setup_percpu_clockev = x86_init_noop;
+ x86_cpuinit.setup_percpu_clockev = x86_init_noop;
Presumably even if we don't have PV_EVTCHN available/enabled, the Xen
clocksource would be available for getting time?
+
+ x86_platform.calibrate_tsc = xen_tsc_khz;
+ x86_platform.get_wallclock = xen_get_wallclock;
+ x86_platform.set_wallclock = xen_set_wallclock;
+
+ pv_apic_ops = xen_apic_ops;
+#ifdef CONFIG_X86_LOCAL_APIC
+ /*
+ * set up the basic apic ops.
+ */
+ set_xen_basic_apic_ops();
+ apic->write = xen_hvm_pv_evtchn_apic_write;
I'd just change the xen_apic_write to remove the WARN_ON, since you
don't seem to care about it either.
+#endif
+
+ callback_via = HVM_CALLBACK_VECTOR(X86_PLATFORM_IPI_VECTOR);
+ set_callback_via(callback_via);
+
+ x86_platform_ipi_callback = do_hvm_pv_evtchn_intr;
+
+ disable_acpi();
+
+ xen_hvm_pv_smp_init();
+ machine_ops = xen_machine_ops;
+ }
#endif
}
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index fadaa97..7827a6d 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -5,6 +5,7 @@
#include<xen/interface/xen.h>
#include<xen/interface/sched.h>
#include<xen/interface/vcpu.h>
+#include<xen/xen.h>
#include<asm/xen/hypercall.h>
#include<asm/xen/hypervisor.h>
@@ -132,6 +133,20 @@ void __init xen_init_irq_ops()
x86_init.irqs.intr_init = xen_init_IRQ;
}
+static void xen_hvm_pv_evtchn_disable(void)
+{
+ native_irq_disable();
+ xen_irq_disable();
+}
+PV_CALLEE_SAVE_REGS_THUNK(xen_hvm_pv_evtchn_disable);
+
+static void xen_hvm_pv_evtchn_enable(void)
+{
+ native_irq_enable();
+ xen_irq_enable();
+}
+PV_CALLEE_SAVE_REGS_THUNK(xen_hvm_pv_evtchn_enable);
+
static void xen_hvm_pv_safe_halt(void)
{
/* Do local_irq_enable() explicitly in hvm_pv guest */
@@ -147,8 +162,26 @@ static void xen_hvm_pv_halt(void)
xen_hvm_pv_safe_halt();
}
+static const struct pv_irq_ops xen_hvm_pv_irq_ops __initdata = {
+ .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
+ .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
+ .irq_disable = PV_CALLEE_SAVE(xen_hvm_pv_evtchn_disable),
+ .irq_enable = PV_CALLEE_SAVE(xen_hvm_pv_evtchn_enable),
+
+ .safe_halt = xen_hvm_pv_safe_halt,
+ .halt = xen_hvm_pv_halt,
+#ifdef CONFIG_X86_64
+ .adjust_exception_frame = paravirt_nop,
+#endif
+};
+
void __init xen_hvm_pv_init_irq_ops(void)
{
- pv_irq_ops.safe_halt = xen_hvm_pv_safe_halt;
- pv_irq_ops.halt = xen_hvm_pv_halt;
+ if (xen_hvm_pv_evtchn_enabled()) {
+ pv_irq_ops = xen_hvm_pv_irq_ops;
+ x86_init.irqs.intr_init = xen_hvm_pv_evtchn_init_IRQ;
+ } else {
+ pv_irq_ops.safe_halt = xen_hvm_pv_safe_halt;
+ pv_irq_ops.halt = xen_hvm_pv_halt;
+ }
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 563d205..a85d0b6 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -15,20 +15,26 @@
#include<linux/sched.h>
#include<linux/err.h>
#include<linux/smp.h>
+#include<linux/nmi.h>
#include<asm/paravirt.h>
#include<asm/desc.h>
#include<asm/pgtable.h>
#include<asm/cpu.h>
+#include<asm/trampoline.h>
+#include<asm/tlbflush.h>
+#include<asm/mtrr.h>
#include<xen/interface/xen.h>
#include<xen/interface/vcpu.h>
#include<asm/xen/interface.h>
#include<asm/xen/hypercall.h>
+#include<asm/xen/hypervisor.h>
#include<xen/page.h>
#include<xen/events.h>
+#include<xen/xen.h>
#include "xen-ops.h"
#include "mmu.h"
@@ -171,7 +177,8 @@ static void __init xen_smp_prepare_boot_cpu(void)
/* We've switched to the "real" per-cpu gdt, so make sure the
old memory can be recycled */
- make_lowmem_page_readwrite(xen_initial_gdt);
+ if (xen_pv_domain())
+ make_lowmem_page_readwrite(xen_initial_gdt);
Test for xen_feature(XENFEAT_writable_descriptor_tables).
xen_setup_vcpu_info_placement();
}
@@ -480,3 +487,138 @@ void __init xen_smp_init(void)
xen_fill_possible_map();
xen_init_spinlocks();
}
+
+static __cpuinit void xen_hvm_pv_start_secondary(void)
+{
+ int cpu = smp_processor_id();
+
+ cpu_init();
+ touch_nmi_watchdog();
+ preempt_disable();
+
+ /* otherwise gcc will move up smp_processor_id before the cpu_init */
+ barrier();
+ /*
+ * Check TSC synchronization with the BSP:
+ */
+ check_tsc_sync_target();
+
+ /* Done in smp_callin(), move it here */
+ set_mtrr_aps_delayed_init();
+ smp_store_cpu_info(cpu);
+
+ /* This must be done before setting cpu_online_mask */
+ set_cpu_sibling_map(cpu);
+ wmb();
+
+ set_cpu_online(smp_processor_id(), true);
+ per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
+
+ /* enable local interrupts */
+ local_irq_enable();
+
+ xen_setup_cpu_clockevents();
How much of this is necessary? At this point, isn't CPU bringup the
same as PV?
+
+ wmb();
+ cpu_idle();
+}
+
+static __cpuinit int
+hvm_pv_cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
+{
+ struct vcpu_guest_context *ctxt;
+ unsigned long start_ip;
+
+ if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
+ return 0;
+
+ ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
+ if (ctxt == NULL)
+ return -ENOMEM;
+
+ early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+ initial_code = (unsigned long)xen_hvm_pv_start_secondary;
+ stack_start.sp = (void *) idle->thread.sp;
+
+ /* start_ip had better be page-aligned! */
+ start_ip = setup_trampoline();
+
+ /* only start_ip is what we want */
+ ctxt->flags = VGCF_HVM_GUEST;
+ ctxt->user_regs.eip = start_ip;
+
+ printk(KERN_INFO "Booting processor %d ip 0x%lx\n", cpu, start_ip);
+
+ if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt))
+ BUG();
+
+ kfree(ctxt);
+ return 0;
+}
+
+static int __init xen_hvm_pv_cpu_up(unsigned int cpu)
+{
+ struct task_struct *idle = idle_task(cpu);
+ int rc;
+ unsigned long flags;
+
+ per_cpu(current_task, cpu) = idle;
+
+#ifdef CONFIG_X86_32
+ irq_ctx_init(cpu);
+#else
+ clear_tsk_thread_flag(idle, TIF_FORK);
+ initial_gs = per_cpu_offset(cpu);
+ per_cpu(kernel_stack, cpu) =
+ (unsigned long)task_stack_page(idle) -
+ KERNEL_STACK_OFFSET + THREAD_SIZE;
+#endif
+
+ xen_setup_timer(cpu);
+ xen_init_lock_cpu(cpu);
+
+ per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
Can you put all this duplicated code into a common function?
+
+ rc = hvm_pv_cpu_initialize_context(cpu, idle);
+ if (rc)
+ return rc;
+
+ if (num_online_cpus() == 1)
+ alternatives_smp_switch(1);
+
+ rc = xen_smp_intr_init(cpu);
+ if (rc)
+ return rc;
+
+ rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
+ BUG_ON(rc);
+
+ /*
+ * Check TSC synchronization with the AP (keep irqs disabled
+ * while doing so):
+ */
+ local_irq_save(flags);
+ check_tsc_sync_source(cpu);
+ local_irq_restore(flags);
+
+ while (!cpu_online(cpu)) {
+ cpu_relax();
+ touch_nmi_watchdog();
+ }
+
+ return 0;
+}
+
+static void xen_hvm_pv_flush_tlb_others(const struct cpumask *cpumask,
+ struct mm_struct *mm, unsigned long va)
+{
+ /* TODO Make it more specific */
+ flush_tlb_all();
Is the MMUEXT_TLB_FLUSH/INVLPG_MULTI hypercall not currently available
to HVM?
+}
+
+void __init xen_hvm_pv_smp_init(void)
+{
It's probably safest to have this do its own test for
xen_hvm_pv_evtchn_enabled(), rather than assuming the caller is getting
it right.
+ smp_ops = xen_smp_ops;
+ smp_ops.cpu_up = xen_hvm_pv_cpu_up;
+ pv_mmu_ops.flush_tlb_others = xen_hvm_pv_flush_tlb_others;
+}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index cc00760..a8cc129 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -34,6 +34,7 @@ void xen_reserve_top(void);
char * __init xen_memory_setup(void);
void __init xen_arch_setup(void);
void __init xen_init_IRQ(void);
+void __init xen_hvm_pv_evtchn_init_IRQ(void);
void xen_enable_sysenter(void);
void xen_enable_syscall(void);
void xen_vcpu_restore(void);
@@ -61,10 +62,12 @@ void xen_setup_vcpu_info_placement(void);
#ifdef CONFIG_SMP
void xen_smp_init(void);
+void xen_hvm_pv_smp_init(void);
extern cpumask_var_t xen_cpu_initialized_map;
#else
static inline void xen_smp_init(void) {}
+static inline void xen_hvm_pv_smp_init(void) {}
#endif
#ifdef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index ce602dd..e1835db 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -37,9 +37,12 @@
#include<xen/xen-ops.h>
#include<xen/events.h>
+#include<xen/xen.h>
#include<xen/interface/xen.h>
#include<xen/interface/event_channel.h>
+#include<asm/desc.h>
+
/*
* This lock protects updates to the following mapping and reference-count
* arrays. The lock does not need to be acquired to read the mapping tables.
@@ -624,8 +627,13 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
unsigned count;
- exit_idle();
- irq_enter();
+ /*
+ * If is PV featured HVM, these have already been done
+ */
+ if (likely(!xen_hvm_pv_evtchn_enabled())) {
+ exit_idle();
+ irq_enter();
+ }
In that case, rather than putting this conditional in the hot path, make
an inner __xen_evtchn_do_upcall which is wrapped by the PV and HVM
variants which do the appropriate things. (And drop the pt_regs arg, I
think.)
do {
unsigned long pending_words;
@@ -662,8 +670,10 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
} while(count != 1);
out:
- irq_exit();
- set_irq_regs(old_regs);
+ if (likely(!xen_hvm_pv_evtchn_enabled())) {
+ irq_exit();
+ set_irq_regs(old_regs);
+ }
put_cpu();
}
@@ -944,3 +954,51 @@ void __init xen_init_IRQ(void)
irq_ctx_init(smp_processor_id());
}
+
+void __init xen_hvm_pv_evtchn_init_IRQ(void)
+{
+ int i;
+
+ xen_init_IRQ();
+ for (i = 0; i< NR_IRQS_LEGACY; i++) {
+ struct evtchn_bind_virq bind_virq;
+ struct irq_desc *desc = irq_to_desc(i);
+ int virq, evtchn;
+
+ virq = i + VIRQ_EMUL_PIN_START;
+ bind_virq.virq = virq;
+ bind_virq.vcpu = 0;
+
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+ &bind_virq) != 0)
+ BUG();
+
+ evtchn = bind_virq.port;
+ evtchn_to_irq[evtchn] = i;
+ irq_info[i] = mk_virq_info(evtchn, virq);
+
+ desc->status = IRQ_DISABLED;
+ desc->action = NULL;
+ desc->depth = 1;
+
+ /*
+ * 16 old-style INTA-cycle interrupts:
+ */
+ set_irq_chip_and_handler_name(i,&xen_dynamic_chip,
+ handle_level_irq, "event");
+ }
+
+ /*
+ * Cover the whole vector space, no vector can escape
+ * us. (some of these will be overridden and become
+ * 'special' SMP interrupts)
+ */
+ for (i = 0; i< (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
+ int vector = FIRST_EXTERNAL_VECTOR + i;
+ if (vector != IA32_SYSCALL_VECTOR)
+ set_intr_gate(vector, interrupt[i]);
+ }
+
+ /* generic IPI for platform specific use, now used for HVM evtchn */
+ alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi);
+}
diff --git a/include/xen/events.h b/include/xen/events.h
index e68d59a..91755db 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -56,4 +56,5 @@ void xen_poll_irq(int irq);
/* Determine the IRQ which is bound to an event channel */
unsigned irq_from_evtchn(unsigned int evtchn);
+void xen_evtchn_do_upcall(struct pt_regs *regs);
#endif /* _XEN_EVENTS_H */
diff --git a/include/xen/hvm.h b/include/xen/hvm.h
index 4ea8887..c66d788 100644
--- a/include/xen/hvm.h
+++ b/include/xen/hvm.h
@@ -20,4 +20,9 @@ static inline unsigned long hvm_get_parameter(int idx)
return xhv.value;
}
+#define HVM_CALLBACK_VIA_TYPE_VECTOR 0x2
+#define HVM_CALLBACK_VIA_TYPE_SHIFT 56
+#define HVM_CALLBACK_VECTOR(x) (((uint64_t)HVM_CALLBACK_VIA_TYPE_VECTOR)<<\
+ HVM_CALLBACK_VIA_TYPE_SHIFT | (x))
+
#endif /* XEN_HVM_H__ */
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 2befa3e..9282ff7 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -90,7 +90,11 @@
#define VIRQ_ARCH_6 22
#define VIRQ_ARCH_7 23
-#define NR_VIRQS 24
+#define VIRQ_EMUL_PIN_START 24
+#define VIRQ_EMUL_PIN_NUM 16
+
+#define NR_VIRQS 40
(VIRQ_EMUL_PIN_START + VIRQ_EMUL_PIN_NUM)?
J
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|
|
|