I implemented sysenter for 32-on-64, since it seemed straightforward
enough. It mostly works, but every now and again I get vcpus just
hanging in blocked state, as if events are being lost or ignored. Its
very similar to the symptoms that other people have reported against the
pvops kernel, which I have not managed to reproduce. Perhaps using
sysenter is exacerbating an existing bug...
Anyway, a couple of questions. It seems that the stack frame that Xen's
sysenter generates is not exactly the same as the one the kernel
expects, so the direct access to the threadinfo structure doesn't work
properly. What's the difference in the frames?
I guess the other reason for the separate PV Xen sysenter entrypoint is
to deal with sysexit not working. I addressed this by implementing a
sysexit pvop using iret, though I think I could just set the TIF_IRET
flag in threadinfo.
Anyway, could you look at these changes and see if anything problematic
leaps out.
I'm also having debugging it, since xenctx and gdbserver-xen don't work
on 32-bit compat domains, and the console itself seems to locked up.
I'm not sure how I can get any state out of the vcpus; even an eip would
help.
Thanks,
J
Subject: xen: add support for callbackops hypercall
Signed-off-by: Jeremy Fitzhardinge <jeremy@xxxxxxxxxxxxx>
---
include/asm-x86/xen/hypercall.h | 6 ++
include/asm-x86/xen/interface.h | 4 +
include/xen/interface/callback.h | 102 ++++++++++++++++++++++++++++++++++++++
3 files changed, 112 insertions(+)
===================================================================
--- a/include/asm-x86/xen/hypercall.h
+++ b/include/asm-x86/xen/hypercall.h
@@ -161,6 +161,12 @@
return _hypercall4(int, set_callbacks,
event_selector, event_address,
failsafe_selector, failsafe_address);
+}
+
+static inline int
+HYPERVISOR_callback_op(int cmd, void *arg)
+{
+ return _hypercall2(int, callback_op, cmd, arg);
}
static inline int
===================================================================
--- a/include/asm-x86/xen/interface.h
+++ b/include/asm-x86/xen/interface.h
@@ -171,6 +171,10 @@
unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */
};
+struct xen_callback {
+ unsigned long cs;
+ unsigned long eip;
+};
#endif /* !__ASSEMBLY__ */
/*
===================================================================
--- /dev/null
+++ b/include/xen/interface/callback.h
@@ -0,0 +1,102 @@
+/******************************************************************************
+ * callback.h
+ *
+ * Register guest OS callbacks with Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2006, Ian Campbell
+ */
+
+#ifndef __XEN_PUBLIC_CALLBACK_H__
+#define __XEN_PUBLIC_CALLBACK_H__
+
+#include "xen.h"
+
+/*
+ * Prototype for this hypercall is:
+ * long callback_op(int cmd, void *extra_args)
+ * @cmd == CALLBACKOP_??? (callback operation).
+ * @extra_args == Operation-specific extra arguments (NULL if none).
+ */
+
+/* ia64, x86: Callback for event delivery. */
+#define CALLBACKTYPE_event 0
+
+/* x86: Failsafe callback when guest state cannot be restored by Xen. */
+#define CALLBACKTYPE_failsafe 1
+
+/* x86/64 hypervisor: Syscall by 64-bit guest app ('64-on-64-on-64'). */
+#define CALLBACKTYPE_syscall 2
+
+/*
+ * x86/32 hypervisor: Only available on x86/32 when supervisor_mode_kernel
+ * feature is enabled. Do not use this callback type in new code.
+ */
+#define CALLBACKTYPE_sysenter_deprecated 3
+
+/* x86: Callback for NMI delivery. */
+#define CALLBACKTYPE_nmi 4
+
+/*
+ * x86: sysenter is only available as follows:
+ * - 32-bit hypervisor: with the supervisor_mode_kernel feature enabled
+ * - 64-bit hypervisor: 32-bit guest applications on Intel CPUs
+ * ('32-on-32-on-64', '32-on-64-on-64')
+ * [nb. also 64-bit guest applications on Intel CPUs
+ * ('64-on-64-on-64'), but syscall is preferred]
+ */
+#define CALLBACKTYPE_sysenter 5
+
+/*
+ * x86/64 hypervisor: Syscall by 32-bit guest app on AMD CPUs
+ * ('32-on-32-on-64', '32-on-64-on-64')
+ */
+#define CALLBACKTYPE_syscall32 7
+
+/*
+ * Disable event deliver during callback? This flag is ignored for event and
+ * NMI callbacks: event delivery is unconditionally disabled.
+ */
+#define _CALLBACKF_mask_events 0
+#define CALLBACKF_mask_events (1U << _CALLBACKF_mask_events)
+
+/*
+ * Register a callback.
+ */
+#define CALLBACKOP_register 0
+struct callback_register {
+ uint16_t type;
+ uint16_t flags;
+ struct xen_callback address;
+};
+
+/*
+ * Unregister a callback.
+ *
+ * Not all callbacks can be unregistered. -EINVAL will be returned if
+ * you attempt to unregister such a callback.
+ */
+#define CALLBACKOP_unregister 1
+struct callback_unregister {
+ uint16_t type;
+ uint16_t _unused;
+};
+
+#endif /* __XEN_PUBLIC_CALLBACK_H__ */
Subject: x86: only enable interrupts when kernel state has been set up
The sysenter path tries to enable interrupts immediately. Unfortunately
this doesn't work in a paravirt environment, because not enough kernel
state has been set up at that point (namely, pointing %fs to the kernel
percpu data segment). To fix this, defer ENABLE_INTERRUPTS until after
the kernel state has been set up.
Unfortunately this means that we're running with interrupts disabled
for a while without calling the IRQ tracing code, but that can't be
called without setting up %fs either.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xxxxxxxxxxxxx>
---
arch/x86/kernel/entry_32.S | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
===================================================================
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -291,10 +291,10 @@ ENTRY(ia32_sysenter_target)
movl TSS_sysenter_sp0(%esp),%esp
ENTRY(sysenter_past_esp)
/*
- * No need to follow this irqs on/off section: the syscall
- * disabled irqs and here we enable it straight after entry:
+ * Interrupts are disabled here, but we can't trace it until
+ * enough kernel state to call TRACE_IRQS_OFF can be called - but
+ * we immediately enable interrupts at that point anyway.
*/
- ENABLE_INTERRUPTS(CLBR_NONE)
pushl $(__USER_DS)
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET ss, 0*/
@@ -330,6 +330,7 @@ 1: movl (%ebp),%ebp
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
+ ENABLE_INTERRUPTS(CLBR_NONE)
GET_THREAD_INFO(%ebp)
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not
testb */
@@ -546,6 +547,7 @@ syscall_fault:
pushl %eax # save orig_eax
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
+ ENABLE_INTERRUPTS(CLBR_NONE)
GET_THREAD_INFO(%ebp)
movl $-EFAULT,PT_EAX(%esp)
jmp resume_userspace
Subject: xen: support sysenter/sysexit if hypervisor does
64-bit Xen supports sysenter for 32-bit guests, so support its
use. (sysenter is faster than int $0x80 in 32-on-64.)
sysexit is still not supported, so we fake it up using iret.
Signed-off-by: Jeremy Fitzhardinge <jeremy@xxxxxxxxxxxxx>
---
arch/x86/kernel/entry_32.S | 22 +++++++++++++++++-
arch/x86/xen/enlighten.c | 3 --
arch/x86/xen/setup.c | 21 +++++++++++++++++
arch/x86/xen/smp.c | 1
arch/x86/xen/xen-asm.S | 52 ++++++++++++++++++++++++++++++++++++++++++++
arch/x86/xen/xen-ops.h | 3 ++
6 files changed, 99 insertions(+), 3 deletions(-)
===================================================================
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -311,6 +311,7 @@ ENTRY(sysenter_past_esp)
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words
* pushed above; +8 corresponds to copy_thread's esp0 setting.
*/
+sysenter_stack_setup:
pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET eip, 0
@@ -1025,6 +1026,16 @@ ENDPROC(kernel_thread_helper)
ENDPROC(kernel_thread_helper)
#ifdef CONFIG_XEN
+/* Xen doesn't set %esp to be precisely what the normal sysenter
+ entrypoint expects, so fix it up before using the normal path. */
+ENTRY(xen_sysenter_target)
+ RING0_INT_FRAME
+ movl $__USER_DS,16(%esp)
+ movl %ebp,12(%esp)
+ movl $__USER_CS,4(%esp)
+ addl $4,%esp
+ jmp sysenter_stack_setup
+
ENTRY(xen_hypervisor_callback)
CFI_STARTPROC
pushl $0
@@ -1044,8 +1055,17 @@ ENTRY(xen_hypervisor_callback)
jae 1f
call xen_iret_crit_fixup
+ jmp 2f
-1: mov %esp, %eax
+1: cmpl $xen_sysexit_start_crit,%eax
+ jb 2f
+ cmpl $xen_sysexit_end_crit,%eax
+ jae 2f
+
+ jmp xen_sysexit_crit_fixup
+
+ENTRY(xen_do_upcall)
+2: mov %esp, %eax
call xen_evtchn_do_upcall
jmp ret_from_intr
CFI_ENDPROC
===================================================================
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -153,7 +153,6 @@ static void xen_cpuid(unsigned int *ax,
if (*ax == 1)
maskedx = ~((1 << X86_FEATURE_APIC) | /* disable APIC */
(1 << X86_FEATURE_ACPI) | /* disable ACPI */
- (1 << X86_FEATURE_SEP) | /* disable SEP */
(1 << X86_FEATURE_ACC)); /* thermal monitoring */
asm(XEN_EMULATE_PREFIX "cpuid"
@@ -969,7 +968,7 @@ static const struct pv_cpu_ops xen_cpu_o
.read_pmc = native_read_pmc,
.iret = xen_iret,
- .irq_enable_syscall_ret = NULL, /* never called */
+ .irq_enable_syscall_ret = xen_sysexit,
.load_tr_desc = paravirt_nop,
.set_ldt = xen_set_ldt,
===================================================================
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -16,6 +16,7 @@
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
+#include <xen/interface/callback.h>
#include <xen/interface/physdev.h>
#include <xen/features.h>
@@ -67,6 +68,24 @@ static void __init fiddle_vdso(void)
*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
}
+void xen_enable_sysenter(void)
+{
+ int cpu = smp_processor_id();
+ extern void xen_sysenter_target(void);
+ /* Mask events on entry, even though they get enabled immediately */
+ static struct callback_register sysenter = {
+ .type = CALLBACKTYPE_sysenter,
+ .address = { __KERNEL_CS, (unsigned long)xen_sysenter_target },
+ .flags = CALLBACKF_mask_events,
+ };
+
+ if (!boot_cpu_has(X86_FEATURE_SEP) ||
+ HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) {
+ clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP);
+ clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
+ }
+}
+
void __init xen_arch_setup(void)
{
struct physdev_set_iopl set_iopl;
@@ -80,6 +99,8 @@ void __init xen_arch_setup(void)
HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned
long)xen_hypervisor_callback,
__KERNEL_CS, (unsigned
long)xen_failsafe_callback);
+
+ xen_enable_sysenter();
set_iopl.iopl = 1;
rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
===================================================================
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -72,6 +72,7 @@ static __cpuinit void cpu_bringup_and_id
int cpu = smp_processor_id();
cpu_init();
+ xen_enable_sysenter();
preempt_disable();
per_cpu(cpu_state, cpu) = CPU_ONLINE;
===================================================================
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -275,6 +275,58 @@ 2: ret
2: ret
+ENTRY(xen_sysexit)
+ /* Store vcpu_info pointer for easy access. Do it this
+ way to avoid having to reload %fs */
+#ifdef CONFIG_SMP
+ GET_THREAD_INFO(%eax)
+ movl TI_cpu(%eax),%eax
+ movl __per_cpu_offset(,%eax,4),%eax
+ mov per_cpu__xen_vcpu(%eax),%eax
+#else
+ movl per_cpu__xen_vcpu, %eax
+#endif
+
+ /* We can't actually use sysexit in a pv guest,
+ so fake it up with iret */
+ pushl $__USER_DS /* user stack segment */
+ pushl %ecx /* user esp */
+ pushl PT_EFLAGS+2*4(%esp) /* user eflags */
+ pushl $__USER_CS /* user code segment */
+ pushl %edx /* user eip */
+
+ /* Unconditionally unmask events and test for pending */
+ andw $0x00ff, XEN_vcpu_info_pending(%eax)
+
+xen_sysexit_start_crit:
+ /* If there's something pending, mask events again so we
+ can directly inject it back into the kernel. */
+ jnz 1f
+
+ movl PT_EAX+5*4(%esp),%eax
+2: iret
+1: movb $1, XEN_vcpu_info_mask(%eax)
+xen_sysexit_end_crit:
+ addl $5*4, %esp /* remove iret frame */
+ /* no need to re-save regs, but need to restore kernel %fs */
+ mov $__KERNEL_PERCPU, %eax
+ mov %eax, %fs
+ jmp xen_do_upcall
+.section __ex_table,"a"
+ .align 4
+ .long 2b,iret_exc
+.previous
+
+ .globl xen_sysexit_start_crit, xen_sysexit_end_crit
+/*
+ sysexit fixup is easy, since the old frame is still sitting there
+ on the stack. We just need to remove the new recursive
+ interrupt and return.
+ */
+ENTRY(xen_sysexit_crit_fixup)
+ addl $PT_OLDESP+5*4, %esp /* remove frame+iret */
+ jmp xen_do_upcall
+
/*
Force an event check by making a hypercall,
but preserve regs before making the call.
===================================================================
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -19,6 +19,7 @@ char * __init xen_memory_setup(void);
char * __init xen_memory_setup(void);
void __init xen_arch_setup(void);
void __init xen_init_IRQ(void);
+void xen_enable_sysenter(void);
void xen_setup_timer(int cpu);
void xen_setup_cpu_clockevents(void);
@@ -64,4 +65,6 @@ DECL_ASM(void, xen_restore_fl_direct, un
DECL_ASM(void, xen_restore_fl_direct, unsigned long);
void xen_iret(void);
+void xen_sysexit(void);
+
#endif /* XEN_OPS_H */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|