This patch adds interrupt handling for nested, mainly includes:
- virtual interrupt when running in nested mode,
- idtv handling in L2.
- interrupt blocking handling in L2
Signed-off-by: Qing He <qing.he@xxxxxxxxx>
Signed-off-by: Eddie Dong <eddie.dong@xxxxxxxxx>
---
diff -r 4934d8db96bf xen/arch/x86/hvm/vmx/intr.c
--- a/xen/arch/x86/hvm/vmx/intr.c Wed Sep 08 22:11:52 2010 +0800
+++ b/xen/arch/x86/hvm/vmx/intr.c Wed Sep 08 22:14:26 2010 +0800
@@ -33,6 +33,7 @@
#include <asm/hvm/support.h>
#include <asm/hvm/vmx/vmx.h>
#include <asm/hvm/vmx/vmcs.h>
+#include <asm/hvm/vmx/vvmcs.h>
#include <asm/hvm/vpic.h>
#include <asm/hvm/vlapic.h>
#include <public/hvm/ioreq.h>
@@ -110,6 +111,103 @@
}
}
+/*
+ * Injecting interrupts for nested virtualization
+ *
+ * When injecting virtual interrupts (originated from L0), there are
+ * two major possibilities, within L1 context and within L2 context
+ * 1. L1 context (in_nesting == 0)
+ * Everything is the same as without nested, check RFLAGS.IF to
+ * see if the injection can be done, using VMCS to inject the
+ * interrupt
+ *
+ * 2. L2 context (in_nesting == 1)
+ * Causes a virtual VMExit, RFLAGS.IF is ignored, whether to ack
+ * irq according to intr_ack_on_exit, shouldn't block normally,
+ * except for:
+ * a. context transition
+ * interrupt needs to be blocked at virtual VMEntry time
+ * b. L2 idtv reinjection
+ * if L2 idtv is handled within L0 (e.g. L0 shadow page fault),
+ * it needs to be reinjected without exiting to L1, interrupt
+ * injection should be blocked as well at this point.
+ *
+ * Unfortunately, interrupt blocking in L2 won't work with simple
+ * intr_window_open (which depends on L2's IF). To solve this,
+ * the following algorithm can be used:
+ * v->arch.hvm_vmx.exec_control.VIRTUAL_INTR_PENDING now denotes
+ * only L0 control, physical control may be different from it.
+ * - if in L1, it behaves normally, intr window is written
+ * to physical control as it is
+ * - if in L2, replace it to MTF (or NMI window) if possible
+ * - if MTF/NMI window is not used, intr window can still be
+ * used but may have negative impact on interrupt performance.
+ */
+
+static int nest_intr_blocked(struct vcpu *v, struct hvm_intack intack)
+{
+ int r = 0;
+
+ if ( !v->arch.hvm_vcpu.in_nesting &&
+ v->arch.hvm_vmx.nest.vmresume_pending )
+ r = 1;
+
+ if ( v->arch.hvm_vcpu.in_nesting )
+ {
+ if ( v->arch.hvm_vmx.nest.vmexit_pending ||
+ v->arch.hvm_vmx.nest.vmresume_in_progress ||
+ (__vmread(VM_ENTRY_INTR_INFO) & INTR_INFO_VALID_MASK) )
+ r = 1;
+ }
+
+ return r;
+}
+
+static int vmx_nest_intr_intercept(struct vcpu *v, struct hvm_intack intack)
+{
+ u32 exit_ctrl;
+
+ /*
+ * TODO:
+ * - if L1 intr-window exiting == 0
+ * - vNMI
+ */
+
+ if ( nest_intr_blocked(v, intack) )
+ {
+ enable_intr_window(v, intack);
+ return 1;
+ }
+
+ if ( v->arch.hvm_vcpu.in_nesting )
+ {
+ if ( intack.source == hvm_intsrc_pic ||
+ intack.source == hvm_intsrc_lapic )
+ {
+ vmx_inject_extint(intack.vector);
+
+ exit_ctrl = __get_vvmcs(v->arch.hvm_vmx.nest.vvmcs,
+ VM_EXIT_CONTROLS);
+ if ( exit_ctrl & VM_EXIT_ACK_INTR_ON_EXIT )
+ {
+ /* for now, duplicate the ack path in vmx_intr_assist */
+ hvm_vcpu_ack_pending_irq(v, intack);
+ pt_intr_post(v, intack);
+
+ intack = hvm_vcpu_has_pending_irq(v);
+ if ( unlikely(intack.source != hvm_intsrc_none) )
+ enable_intr_window(v, intack);
+ }
+ else
+ enable_intr_window(v, intack);
+
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
asmlinkage void vmx_intr_assist(void)
{
struct hvm_intack intack;
@@ -133,6 +231,9 @@
if ( likely(intack.source == hvm_intsrc_none) )
goto out;
+ if ( unlikely(vmx_nest_intr_intercept(v, intack)) )
+ goto out;
+
intblk = hvm_interrupt_blocked(v, intack);
if ( intblk == hvm_intblk_tpr )
{
diff -r 4934d8db96bf xen/arch/x86/hvm/vmx/nest.c
--- a/xen/arch/x86/hvm/vmx/nest.c Wed Sep 08 22:11:52 2010 +0800
+++ b/xen/arch/x86/hvm/vmx/nest.c Wed Sep 08 22:14:26 2010 +0800
@@ -680,6 +680,7 @@
{
struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+ /* TODO: change L0 intr window to MTF or NMI window */
set_shadow_control(nest, CPU_BASED_VM_EXEC_CONTROL, value);
}
@@ -973,6 +974,33 @@
__set_vvmcs(nest->vvmcs, VM_ENTRY_INTR_INFO, 0);
}
+static void vmx_nest_intr_exit(struct vmx_nest_struct *nest)
+{
+ if ( !(nest->intr_info & INTR_INFO_VALID_MASK) )
+ return;
+
+ switch ( nest->intr_info & INTR_INFO_INTR_TYPE_MASK )
+ {
+ case X86_EVENTTYPE_EXT_INTR:
+ /* rename exit_reason to EXTERNAL_INTERRUPT */
+ __set_vvmcs(nest->vvmcs, VM_EXIT_REASON,
EXIT_REASON_EXTERNAL_INTERRUPT);
+ __set_vvmcs(nest->vvmcs, EXIT_QUALIFICATION, 0);
+ __set_vvmcs(nest->vvmcs, VM_EXIT_INTR_INFO, nest->intr_info);
+ break;
+
+ case X86_EVENTTYPE_HW_EXCEPTION:
+ case X86_EVENTTYPE_SW_INTERRUPT:
+ case X86_EVENTTYPE_SW_EXCEPTION:
+ /* throw to L1 */
+ __set_vvmcs(nest->vvmcs, VM_EXIT_INTR_INFO, nest->intr_info);
+ __set_vvmcs(nest->vvmcs, VM_EXIT_INTR_ERROR_CODE, nest->error_code);
+ break;
+ case X86_EVENTTYPE_NMI:
+ default:
+ break;
+ }
+}
+
static void virtual_vmexit(struct cpu_user_regs *regs)
{
struct vcpu *v = current;
@@ -982,6 +1010,8 @@
#endif
sync_vvmcs_ro(nest);
+ vmx_nest_intr_exit(nest);
+
sync_vvmcs_guest_state(nest);
vmx_vmcs_switch_current(v, v->arch.hvm_vmx.vmcs, nest->hvmcs);
@@ -1043,3 +1073,39 @@
virtual_vmentry(regs);
}
}
+
+void vmx_nest_idtv_handling(void)
+{
+ struct vcpu *v = current;
+ struct vmx_nest_struct *nest = &v->arch.hvm_vmx.nest;
+ unsigned int idtv_info = __vmread(IDT_VECTORING_INFO);
+
+ if ( likely(!(idtv_info & INTR_INFO_VALID_MASK)) )
+ return;
+
+ /*
+ * If L0 can solve the fault that causes idt vectoring, it should
+ * be reinjected, otherwise, pass to L1.
+ */
+ if ( (__vmread(VM_EXIT_REASON) != EXIT_REASON_EPT_VIOLATION &&
+ !(nest->intr_info & INTR_INFO_VALID_MASK)) ||
+ (__vmread(VM_EXIT_REASON) == EXIT_REASON_EPT_VIOLATION &&
+ !nest->vmexit_pending) )
+ {
+ __vmwrite(VM_ENTRY_INTR_INFO, idtv_info & ~INTR_INFO_RESVD_BITS_MASK);
+ if ( idtv_info & INTR_INFO_DELIVER_CODE_MASK )
+ __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
+ __vmread(IDT_VECTORING_ERROR_CODE));
+ /*
+ * SDM 23.2.4, if L1 tries to inject a software interrupt
+ * and the delivery fails, VM_EXIT_INSTRUCTION_LEN receives
+ * the value of previous VM_ENTRY_INSTRUCTION_LEN.
+ *
+ * This means EXIT_INSTRUCTION_LEN is always valid here, for
+ * software interrupts both injected by L1, and generated in L2.
+ */
+ __vmwrite(VM_ENTRY_INSTRUCTION_LEN, __vmread(VM_EXIT_INSTRUCTION_LEN));
+ }
+
+ /* TODO: NMI */
+}
diff -r 4934d8db96bf xen/arch/x86/hvm/vmx/vmx.c
--- a/xen/arch/x86/hvm/vmx/vmx.c Wed Sep 08 22:11:52 2010 +0800
+++ b/xen/arch/x86/hvm/vmx/vmx.c Wed Sep 08 22:14:26 2010 +0800
@@ -1270,6 +1270,7 @@
{
unsigned long intr_fields;
struct vcpu *curr = current;
+ struct vmx_nest_struct *nest = &curr->arch.hvm_vmx.nest;
/*
* NB. Callers do not need to worry about clearing STI/MOV-SS blocking:
@@ -1281,11 +1282,21 @@
intr_fields = (INTR_INFO_VALID_MASK | (type<<8) | trap);
if ( error_code != HVM_DELIVER_NO_ERROR_CODE ) {
- __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
intr_fields |= INTR_INFO_DELIVER_CODE_MASK;
+ if ( curr->arch.hvm_vcpu.in_nesting )
+ nest->error_code = error_code;
+ else
+ __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
}
- __vmwrite(VM_ENTRY_INTR_INFO, intr_fields);
+ if ( curr->arch.hvm_vcpu.in_nesting )
+ {
+ nest->intr_info = intr_fields;
+ nest->vmexit_pending = 1;
+ return;
+ }
+ else
+ __vmwrite(VM_ENTRY_INTR_INFO, intr_fields);
/* Can't inject exceptions in virtual 8086 mode because they would
* use the protected-mode IDT. Emulate at the next vmenter instead. */
@@ -1295,9 +1306,14 @@
void vmx_inject_hw_exception(int trap, int error_code)
{
- unsigned long intr_info = __vmread(VM_ENTRY_INTR_INFO);
+ unsigned long intr_info;
struct vcpu *curr = current;
+ if ( curr->arch.hvm_vcpu.in_nesting )
+ intr_info = curr->arch.hvm_vmx.nest.intr_info;
+ else
+ intr_info = __vmread(VM_ENTRY_INTR_INFO);
+
switch ( trap )
{
case TRAP_debug:
@@ -2287,9 +2303,31 @@
return -1;
}
+static void vmx_idtv_reinject(unsigned long idtv_info)
+{
+ if ( hvm_event_needs_reinjection((idtv_info>>8)&7, idtv_info&0xff) )
+ {
+ /* See SDM 3B 25.7.1.1 and .2 for info about masking resvd bits. */
+ __vmwrite(VM_ENTRY_INTR_INFO,
+ idtv_info & ~INTR_INFO_RESVD_BITS_MASK);
+ if ( idtv_info & INTR_INFO_DELIVER_CODE_MASK )
+ __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
+ __vmread(IDT_VECTORING_ERROR_CODE));
+ }
+
+ /*
+ * Clear NMI-blocking interruptibility info if an NMI delivery faulted.
+ * Re-delivery will re-set it (see SDM 3B 25.7.1.2).
+ */
+ if ( (idtv_info & INTR_INFO_INTR_TYPE_MASK) == (X86_EVENTTYPE_NMI<<8) )
+ __vmwrite(GUEST_INTERRUPTIBILITY_INFO,
+ __vmread(GUEST_INTERRUPTIBILITY_INFO) &
+ ~VMX_INTR_SHADOW_NMI);
+}
+
asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
{
- unsigned int exit_reason, idtv_info, intr_info = 0, vector = 0;
+ unsigned int exit_reason, idtv_info = 0, intr_info = 0, vector = 0;
unsigned long exit_qualification, inst_len = 0;
struct vcpu *v = current;
@@ -2374,29 +2412,14 @@
hvm_maybe_deassert_evtchn_irq();
- /* Event delivery caused this intercept? Queue for redelivery. */
- idtv_info = __vmread(IDT_VECTORING_INFO);
- if ( unlikely(idtv_info & INTR_INFO_VALID_MASK) &&
- (exit_reason != EXIT_REASON_TASK_SWITCH) )
+ /* TODO: consolidate nested idtv handling with ordinary one */
+ if ( !v->arch.hvm_vcpu.in_nesting )
{
- if ( hvm_event_needs_reinjection((idtv_info>>8)&7, idtv_info&0xff) )
- {
- /* See SDM 3B 25.7.1.1 and .2 for info about masking resvd bits. */
- __vmwrite(VM_ENTRY_INTR_INFO,
- idtv_info & ~INTR_INFO_RESVD_BITS_MASK);
- if ( idtv_info & INTR_INFO_DELIVER_CODE_MASK )
- __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE,
- __vmread(IDT_VECTORING_ERROR_CODE));
- }
-
- /*
- * Clear NMI-blocking interruptibility info if an NMI delivery faulted.
- * Re-delivery will re-set it (see SDM 3B 25.7.1.2).
- */
- if ( (idtv_info & INTR_INFO_INTR_TYPE_MASK) == (X86_EVENTTYPE_NMI<<8) )
- __vmwrite(GUEST_INTERRUPTIBILITY_INFO,
- __vmread(GUEST_INTERRUPTIBILITY_INFO) &
- ~VMX_INTR_SHADOW_NMI);
+ /* Event delivery caused this intercept? Queue for redelivery. */
+ idtv_info = __vmread(IDT_VECTORING_INFO);
+ if ( unlikely(idtv_info & INTR_INFO_VALID_MASK) &&
+ (exit_reason != EXIT_REASON_TASK_SWITCH) )
+ vmx_idtv_reinject(idtv_info);
}
switch ( exit_reason )
@@ -2721,6 +2744,9 @@
domain_crash(v->domain);
break;
}
+
+ if ( v->arch.hvm_vcpu.in_nesting )
+ vmx_nest_idtv_handling();
}
asmlinkage void vmx_vmenter_helper(void)
diff -r 4934d8db96bf xen/include/asm-x86/hvm/vmx/nest.h
--- a/xen/include/asm-x86/hvm/vmx/nest.h Wed Sep 08 22:11:52 2010 +0800
+++ b/xen/include/asm-x86/hvm/vmx/nest.h Wed Sep 08 22:14:26 2010 +0800
@@ -54,6 +54,9 @@
* with vmresume_in_progress
*/
int vmresume_in_progress;
+
+ unsigned long intr_info;
+ unsigned long error_code;
};
asmlinkage void vmx_nest_switch_mode(void);
@@ -76,4 +79,6 @@
unsigned long value);
void vmx_nest_update_exception_bitmap(struct vcpu *v, unsigned long value);
+void vmx_nest_idtv_handling(void);
+
#endif /* __ASM_X86_HVM_NEST_H__ */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|