|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH] x86/hvm: Add Kconfig option to disable nested virtualization
On Fri Feb 13, 2026 at 11:02 PM CET, Stefano Stabellini wrote:
> Introduce CONFIG_NESTED_VIRT (default n) to allow nested virtualization
> support to be disabled at build time. This is useful for embedded or
> safety-focused deployments where nested virtualization is not needed,
> reducing code size and attack surface.
>
> When CONFIG_NESTED_VIRT=n, the following source files are excluded:
> - arch/x86/hvm/nestedhvm.c
> - arch/x86/hvm/svm/nestedsvm.c
> - arch/x86/hvm/vmx/vvmx.c
> - arch/x86/mm/nested.c
> - arch/x86/mm/hap/nested_hap.c
> - arch/x86/mm/hap/nested_ept.c
>
> Add inline stubs where needed in headers. Guard assembly code paths
> for nested virt with #ifdef CONFIG_NESTED_VIRT. Move exception
> injection for VMX/SVM instructions to the callers in vmx.c/svm.c to
> avoid header dependency issues in the stubs.
>
> No functional change when CONFIG_NESTED_VIRT=y.
>
> Signed-off-by: Stefano Stabellini <stefano.stabellini@xxxxxxx>
>
> ---
> Changes in v3:
> - Kconfig: Change "depends on AMD_SVM || INTEL_VMX" to "depends on HVM"
> - Kconfig: Remove redundant "default n" line
> - Kconfig: Remove "If unsure, say N." from help text
> - mm/hap/Makefile: Simplify using intermediate nested-y variable:
> nested-y := nested_hap.o
> nested-$(CONFIG_INTEL_VMX) += nested_ept.o
> obj-$(CONFIG_NESTED_VIRT) += $(nested-y)
> - svm/nestedhvm.h: Remove #ifdef CONFIG_NESTED_VIRT stubs, keep only
> function declarations (the functions are only called from code that
> is already compiled out when nested virt is disabled)
> - svm/nestedhvm.h: Add CONFIG_NESTED_VIRT guard to nsvm_efer_svm_enabled
> macro to return false when nested virt is disabled
> - svm/svm.c: Move #UD injection for STGI/CLGI to the caller instead of
> stub functions, checking nestedhvm_enabled()/nsvm_efer_svm_enabled()
> - svm/svm.c: Mark svm_vmexit_do_vmrun/vmload/vmsave as __maybe_unused
> - svm/svm.c: Remove empty nsvm_vcpu_switch stub (now guarded in asm)
> - svm/entry.S: Add #ifdef CONFIG_NESTED_VIRT guards around nested virt
> specific code paths
> - vmx/vmx.c: Remove empty nvmx_switch_guest stub (now guarded in asm)
> - vmx/vmx.c: Move nvmx_enqueue_n2_exceptions and nvmx_vmexit_event to
> vvmx.c where they belong
> - vmx/vvmx.h: Add declarations for nvmx_vmexit_event and
> nvmx_enqueue_n2_exceptions
> - vmx/vvmx.h: Fix nvmx_msr_read_intercept stub comment
> - vmx/vvmx.h: nvmx_handle_vmx_insn stub returns X86EMUL_EXCEPTION with
> ASSERT_UNREACHABLE (caller handles injection)
> - vmx/vvmx.h: Convert get_vvmcs macro to inline function in stubs
> - vmx/entry.S: Add #ifdef CONFIG_NESTED_VIRT guard around nvmx_switch_guest
> - nestedhvm.h: Convert macro stubs to proper inline functions
> ---
> xen/arch/x86/hvm/Kconfig | 7 +++
> xen/arch/x86/hvm/Makefile | 2 +-
> xen/arch/x86/hvm/svm/Makefile | 2 +-
> xen/arch/x86/hvm/svm/entry.S | 4 ++
> xen/arch/x86/hvm/svm/nestedhvm.h | 2 +-
> xen/arch/x86/hvm/svm/svm.c | 18 ++++--
> xen/arch/x86/hvm/vmx/Makefile | 2 +-
> xen/arch/x86/hvm/vmx/entry.S | 2 +
> xen/arch/x86/hvm/vmx/vmx.c | 31 +---------
> xen/arch/x86/hvm/vmx/vvmx.c | 26 +++++++++
> xen/arch/x86/include/asm/hvm/hvm.h | 2 +-
> xen/arch/x86/include/asm/hvm/nestedhvm.h | 64 +++++++++++++++++---
> xen/arch/x86/include/asm/hvm/vmx/vvmx.h | 74 ++++++++++++++++++++++++
> xen/arch/x86/mm/Makefile | 2 +-
> xen/arch/x86/mm/hap/Makefile | 5 +-
> xen/arch/x86/mm/p2m.h | 6 ++
> xen/arch/x86/sysctl.c | 2 +
> xen/include/public/sysctl.h | 4 +-
> 18 files changed, 204 insertions(+), 51 deletions(-)
>
> diff --git a/xen/arch/x86/hvm/Kconfig b/xen/arch/x86/hvm/Kconfig
> index f32bf5cbb7..af661385b5 100644
> --- a/xen/arch/x86/hvm/Kconfig
> +++ b/xen/arch/x86/hvm/Kconfig
> @@ -92,4 +92,11 @@ config MEM_SHARING
> bool "Xen memory sharing support (UNSUPPORTED)" if UNSUPPORTED
> depends on INTEL_VMX
>
> +config NESTED_VIRT
> + bool "Nested virtualization support"
> + depends on HVM
> + help
> + Enable nested virtualization, allowing guests to run their own
> + hypervisors. This requires hardware support.
nit: If we state above "allowing HVM guests..." rather than plain "guests" we
can
then get rid of the "This requires hardware support line". What you probably
meant is that this is HVM-only and we don't allow PV nesting.
"This requires hardware support" makes me (the user) think my hardware needs
something special to support nesting, when in reality I just need HVM support.
> +
> endif
> diff --git a/xen/arch/x86/hvm/Makefile b/xen/arch/x86/hvm/Makefile
> index f34fb03934..b8a0a68624 100644
> --- a/xen/arch/x86/hvm/Makefile
> +++ b/xen/arch/x86/hvm/Makefile
> @@ -18,7 +18,7 @@ obj-y += irq.o
> obj-y += mmio.o
> obj-$(CONFIG_VM_EVENT) += monitor.o
> obj-y += mtrr.o
> -obj-y += nestedhvm.o
> +obj-$(CONFIG_NESTED_VIRT) += nestedhvm.o
> obj-y += pmtimer.o
> obj-y += quirks.o
> obj-y += rtc.o
> diff --git a/xen/arch/x86/hvm/svm/Makefile b/xen/arch/x86/hvm/svm/Makefile
> index 8a072cafd5..92418e3444 100644
> --- a/xen/arch/x86/hvm/svm/Makefile
> +++ b/xen/arch/x86/hvm/svm/Makefile
> @@ -2,6 +2,6 @@ obj-y += asid.o
> obj-y += emulate.o
> obj-bin-y += entry.o
> obj-y += intr.o
> -obj-y += nestedsvm.o
> +obj-$(CONFIG_NESTED_VIRT) += nestedsvm.o
> obj-y += svm.o
> obj-y += vmcb.o
> diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S
> index af8db23b03..7964c80750 100644
> --- a/xen/arch/x86/hvm/svm/entry.S
> +++ b/xen/arch/x86/hvm/svm/entry.S
> @@ -28,7 +28,9 @@ FUNC(svm_asm_do_resume)
> GET_CURRENT(bx)
> .Lsvm_do_resume:
> call svm_intr_assist
> +#ifdef CONFIG_NESTED_VIRT
> call nsvm_vcpu_switch
> +#endif
> ASSERT_NOT_IN_ATOMIC
>
> mov VCPU_processor(%rbx),%eax
> @@ -39,6 +41,7 @@ FUNC(svm_asm_do_resume)
> cmp %ecx,(%rdx,%rax,1)
> jne .Lsvm_process_softirqs
>
> +#ifdef CONFIG_NESTED_VIRT
> cmp %cl,VCPU_nsvm_hap_enabled(%rbx)
> UNLIKELY_START(ne, nsvm_hap)
> cmp %rcx,VCPU_nhvm_p2m(%rbx)
> @@ -52,6 +55,7 @@ UNLIKELY_START(ne, nsvm_hap)
> sti
> jmp .Lsvm_do_resume
> __UNLIKELY_END(nsvm_hap)
> +#endif
>
> call svm_vmenter_helper
>
> diff --git a/xen/arch/x86/hvm/svm/nestedhvm.h
> b/xen/arch/x86/hvm/svm/nestedhvm.h
> index 9bfed5ffd7..5cb85410f8 100644
> --- a/xen/arch/x86/hvm/svm/nestedhvm.h
> +++ b/xen/arch/x86/hvm/svm/nestedhvm.h
> @@ -24,7 +24,7 @@
>
> /* True when l1 guest enabled SVM in EFER */
> #define nsvm_efer_svm_enabled(v) \
> - (!!((v)->arch.hvm.guest_efer & EFER_SVME))
> + (IS_ENABLED(CONFIG_NESTED_VIRT) && ((v)->arch.hvm.guest_efer &
> EFER_SVME))
>
> int nestedsvm_vmcb_map(struct vcpu *v, uint64_t vmcbaddr);
> void nestedsvm_vmexit_defer(struct vcpu *v,
> diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
> index 18ba837738..2cabc89fb5 100644
> --- a/xen/arch/x86/hvm/svm/svm.c
> +++ b/xen/arch/x86/hvm/svm/svm.c
> @@ -2165,7 +2165,7 @@ static void svm_vmexit_do_pause(struct cpu_user_regs
> *regs)
> vcpu_yield();
> }
>
> -static void
> +static void __maybe_unused
> svm_vmexit_do_vmrun(struct cpu_user_regs *regs,
> struct vcpu *v, uint64_t vmcbaddr)
> {
> @@ -2211,7 +2211,7 @@ nsvm_get_nvmcb_page(struct vcpu *v, uint64_t vmcbaddr)
> return page;
> }
>
> -static void
> +static void __maybe_unused
> svm_vmexit_do_vmload(struct vmcb_struct *vmcb,
> struct cpu_user_regs *regs,
> struct vcpu *v, uint64_t vmcbaddr)
> @@ -2246,7 +2246,7 @@ svm_vmexit_do_vmload(struct vmcb_struct *vmcb,
> __update_guest_eip(regs, inst_len);
> }
>
> -static void
> +static void __maybe_unused
> svm_vmexit_do_vmsave(struct vmcb_struct *vmcb,
> struct cpu_user_regs *regs,
> struct vcpu *v, uint64_t vmcbaddr)
> @@ -2465,6 +2465,7 @@ static struct hvm_function_table __initdata_cf_clobber
> svm_function_table = {
> .set_rdtsc_exiting = svm_set_rdtsc_exiting,
> .get_insn_bytes = svm_get_insn_bytes,
>
> +#ifdef CONFIG_NESTED_VIRT
> .nhvm_vcpu_initialise = nsvm_vcpu_initialise,
> .nhvm_vcpu_destroy = nsvm_vcpu_destroy,
> .nhvm_vcpu_reset = nsvm_vcpu_reset,
> @@ -2474,6 +2475,7 @@ static struct hvm_function_table __initdata_cf_clobber
> svm_function_table = {
> .nhvm_vmcx_hap_enabled = nsvm_vmcb_hap_enabled,
> .nhvm_intr_blocked = nsvm_intr_blocked,
> .nhvm_hap_walk_L1_p2m = nsvm_hap_walk_L1_p2m,
> +#endif
>
> .get_reg = svm_get_reg,
> .set_reg = svm_set_reg,
> @@ -3011,10 +3013,16 @@ void asmlinkage svm_vmexit_handler(void)
> svm_vmexit_do_vmsave(vmcb, regs, v, regs->rax);
> break;
> case VMEXIT_STGI:
> - svm_vmexit_do_stgi(regs, v);
> + if ( !nestedhvm_enabled(v->domain) )
> + hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
> + else
> + svm_vmexit_do_stgi(regs, v);
> break;
> case VMEXIT_CLGI:
> - svm_vmexit_do_clgi(regs, v);
> + if ( !nsvm_efer_svm_enabled(v) )
> + hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
> + else
> + svm_vmexit_do_clgi(regs, v);
> break;
nit: For readability I'd consider reversing the polarity and putting the enabled
cases in the first branch.
>
> case VMEXIT_XSETBV:
> diff --git a/xen/arch/x86/hvm/vmx/Makefile b/xen/arch/x86/hvm/vmx/Makefile
> index 04a29ce59d..902564b3e2 100644
> --- a/xen/arch/x86/hvm/vmx/Makefile
> +++ b/xen/arch/x86/hvm/vmx/Makefile
> @@ -3,4 +3,4 @@ obj-y += intr.o
> obj-y += realmode.o
> obj-y += vmcs.o
> obj-y += vmx.o
> -obj-y += vvmx.o
> +obj-$(CONFIG_NESTED_VIRT) += vvmx.o
> diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
> index 2bfee715b3..4d62efddf4 100644
> --- a/xen/arch/x86/hvm/vmx/entry.S
> +++ b/xen/arch/x86/hvm/vmx/entry.S
> @@ -83,7 +83,9 @@ FUNC(vmx_asm_vmexit_handler)
>
> .Lvmx_do_vmentry:
> call vmx_intr_assist
> +#ifdef CONFIG_NESTED_VIRT
> call nvmx_switch_guest
> +#endif
> ASSERT_NOT_IN_ATOMIC
>
> mov VCPU_processor(%rbx),%eax
> diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
> index 82c55f49ae..4e3c8018d2 100644
> --- a/xen/arch/x86/hvm/vmx/vmx.c
> +++ b/xen/arch/x86/hvm/vmx/vmx.c
> @@ -2014,33 +2014,6 @@ static void cf_check vmx_update_guest_efer(struct vcpu
> *v)
> vmx_set_msr_intercept(v, MSR_EFER, VMX_MSR_R);
> }
>
> -static void nvmx_enqueue_n2_exceptions(struct vcpu *v,
> - unsigned long intr_fields, int error_code, uint8_t source)
> -{
> - struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
> -
> - if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) ) {
> - /* enqueue the exception till the VMCS switch back to L1 */
> - nvmx->intr.intr_info = intr_fields;
> - nvmx->intr.error_code = error_code;
> - nvmx->intr.source = source;
> - vcpu_nestedhvm(v).nv_vmexit_pending = 1;
> - return;
> - }
> - else
> - gdprintk(XENLOG_ERR, "Double Fault on Nested Guest: exception %lx %x"
> - "on %lx %x\n", intr_fields, error_code,
> - nvmx->intr.intr_info, nvmx->intr.error_code);
> -}
> -
> -static int cf_check nvmx_vmexit_event(
> - struct vcpu *v, const struct x86_event *event)
> -{
> - nvmx_enqueue_n2_exceptions(v, event->vector, event->error_code,
> - hvm_intsrc_none);
> - return NESTEDHVM_VMEXIT_DONE;
> -}
> -
> static void __vmx_inject_exception(int trap, int type, int error_code)
> {
> unsigned long intr_fields;
> @@ -2933,6 +2906,7 @@ static struct hvm_function_table __initdata_cf_clobber
> vmx_function_table = {
> .handle_cd = vmx_handle_cd,
> .set_info_guest = vmx_set_info_guest,
> .set_rdtsc_exiting = vmx_set_rdtsc_exiting,
> +#ifdef CONFIG_NESTED_VIRT
> .nhvm_vcpu_initialise = nvmx_vcpu_initialise,
> .nhvm_vcpu_destroy = nvmx_vcpu_destroy,
> .nhvm_vcpu_reset = nvmx_vcpu_reset,
> @@ -2942,8 +2916,9 @@ static struct hvm_function_table __initdata_cf_clobber
> vmx_function_table = {
> .nhvm_vcpu_vmexit_event = nvmx_vmexit_event,
> .nhvm_intr_blocked = nvmx_intr_blocked,
> .nhvm_domain_relinquish_resources = nvmx_domain_relinquish_resources,
> - .update_vlapic_mode = vmx_vlapic_msr_changed,
> .nhvm_hap_walk_L1_p2m = nvmx_hap_walk_L1_p2m,
> +#endif
> + .update_vlapic_mode = vmx_vlapic_msr_changed,
> #ifdef CONFIG_VM_EVENT
> .enable_msr_interception = vmx_enable_msr_interception,
> #endif
> diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c
> index 38952f0696..2bb42678c5 100644
> --- a/xen/arch/x86/hvm/vmx/vvmx.c
> +++ b/xen/arch/x86/hvm/vmx/vvmx.c
> @@ -2821,6 +2821,32 @@ void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned
> int cr)
> __vmwrite(read_shadow_field, v->arch.hvm.nvcpu.guest_cr[cr]);
> }
>
> +void nvmx_enqueue_n2_exceptions(struct vcpu *v,
> + unsigned long intr_fields, int error_code, uint8_t source)
> +{
> + struct nestedvmx *nvmx = &vcpu_2_nvmx(v);
> +
> + if ( !(nvmx->intr.intr_info & INTR_INFO_VALID_MASK) ) {
> + /* enqueue the exception till the VMCS switch back to L1 */
> + nvmx->intr.intr_info = intr_fields;
> + nvmx->intr.error_code = error_code;
> + nvmx->intr.source = source;
> + vcpu_nestedhvm(v).nv_vmexit_pending = 1;
> + return;
> + }
> + else
> + gdprintk(XENLOG_ERR, "Double Fault on Nested Guest: exception %lx %x"
> + "on %lx %x\n", intr_fields, error_code,
> + nvmx->intr.intr_info, nvmx->intr.error_code);
> +}
> +
> +int cf_check nvmx_vmexit_event(struct vcpu *v, const struct x86_event *event)
> +{
> + nvmx_enqueue_n2_exceptions(v, event->vector, event->error_code,
> + hvm_intsrc_none);
> + return NESTEDHVM_VMEXIT_DONE;
> +}
> +
> void __init start_nested_vmx(struct hvm_function_table *hvm_function_table)
> {
> /* TODO: Require hardware support before enabling nested virt */
> diff --git a/xen/arch/x86/include/asm/hvm/hvm.h
> b/xen/arch/x86/include/asm/hvm/hvm.h
> index 7d9774df59..536a38b450 100644
> --- a/xen/arch/x86/include/asm/hvm/hvm.h
> +++ b/xen/arch/x86/include/asm/hvm/hvm.h
> @@ -711,7 +711,7 @@ static inline bool hvm_altp2m_supported(void)
> /* Returns true if we have the minimum hardware requirements for nested virt
> */
> static inline bool hvm_nested_virt_supported(void)
> {
> - return hvm_funcs.caps.nested_virt;
> + return IS_ENABLED(CONFIG_NESTED_VIRT) && hvm_funcs.caps.nested_virt;
> }
>
> #ifdef CONFIG_ALTP2M
> diff --git a/xen/arch/x86/include/asm/hvm/nestedhvm.h
> b/xen/arch/x86/include/asm/hvm/nestedhvm.h
> index ea2c1bc328..2f8209271a 100644
> --- a/xen/arch/x86/include/asm/hvm/nestedhvm.h
> +++ b/xen/arch/x86/include/asm/hvm/nestedhvm.h
> @@ -25,9 +25,21 @@ enum nestedhvm_vmexits {
> /* Nested HVM on/off per domain */
> static inline bool nestedhvm_enabled(const struct domain *d)
> {
> - return IS_ENABLED(CONFIG_HVM) && (d->options &
> XEN_DOMCTL_CDF_nested_virt);
> + return IS_ENABLED(CONFIG_NESTED_VIRT) &&
> + (d->options & XEN_DOMCTL_CDF_nested_virt);
> }
>
> +/* Nested paging */
> +#define NESTEDHVM_PAGEFAULT_DONE 0
> +#define NESTEDHVM_PAGEFAULT_INJECT 1
> +#define NESTEDHVM_PAGEFAULT_L1_ERROR 2
> +#define NESTEDHVM_PAGEFAULT_L0_ERROR 3
> +#define NESTEDHVM_PAGEFAULT_MMIO 4
> +#define NESTEDHVM_PAGEFAULT_RETRY 5
> +#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
> +
> +#ifdef CONFIG_NESTED_VIRT
> +
> /* Nested VCPU */
> int nestedhvm_vcpu_initialise(struct vcpu *v);
> void nestedhvm_vcpu_destroy(struct vcpu *v);
> @@ -38,14 +50,6 @@ bool nestedhvm_vcpu_in_guestmode(struct vcpu *v);
> #define nestedhvm_vcpu_exit_guestmode(v) \
> vcpu_nestedhvm(v).nv_guestmode = 0
>
> -/* Nested paging */
> -#define NESTEDHVM_PAGEFAULT_DONE 0
> -#define NESTEDHVM_PAGEFAULT_INJECT 1
> -#define NESTEDHVM_PAGEFAULT_L1_ERROR 2
> -#define NESTEDHVM_PAGEFAULT_L0_ERROR 3
> -#define NESTEDHVM_PAGEFAULT_MMIO 4
> -#define NESTEDHVM_PAGEFAULT_RETRY 5
> -#define NESTEDHVM_PAGEFAULT_DIRECT_MMIO 6
> int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
> struct npfec npfec);
>
> @@ -59,6 +63,48 @@ unsigned long *nestedhvm_vcpu_iomap_get(bool ioport_80,
> bool ioport_ed);
>
> void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m);
>
> +#else /* !CONFIG_NESTED_VIRT */
There's a lot more stubs than needed here.
> +
> +static inline int nestedhvm_vcpu_initialise(struct vcpu *v)
> +{
> + ASSERT_UNREACHABLE();
> + return -EOPNOTSUPP;
> +}
Can remove.
> +static inline void nestedhvm_vcpu_destroy(struct vcpu *v) { }
Must stay.
> +static inline void nestedhvm_vcpu_reset(struct vcpu *v)
> +{
> + ASSERT_UNREACHABLE();
> +}
Can remove.
> +static inline bool nestedhvm_vcpu_in_guestmode(struct vcpu *v) { return
> false; }
Must stay.
> +static inline int nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t
> *L2_gpa,
> + struct npfec npfec)
> +{
> + ASSERT_UNREACHABLE();
> + return NESTEDHVM_PAGEFAULT_L0_ERROR;
> +}
Can remove
> +static inline void nestedhvm_vcpu_enter_guestmode(struct vcpu *v)
> +{
> + ASSERT_UNREACHABLE();
> +}
> +static inline void nestedhvm_vcpu_exit_guestmode(struct vcpu *v)
> +{
> + ASSERT_UNREACHABLE();
> +}
These two can be removed. It might be good to keep the real macros hidden under
CONFIG_NESTED_VIRT though to ensure they can't be called.
> +static inline bool nestedhvm_paging_mode_hap(struct vcpu *v)
> +{
> + return false;
> +}
This can be removed with a cleaner IS_ENABLED() check in nhvm_vmcx_hap_enabled()
> +static inline bool nestedhvm_vmswitch_in_progress(struct vcpu *v)
> +{
> + return false;
> +}
Would be cleaner with an IS_ENABLED() check in the macro itself, IMO.
> +static inline void nestedhvm_vmcx_flushtlb(struct p2m_domain *p2m)
> +{
> + ASSERT_UNREACHABLE();
> +}
Can remove.
> +
> +#endif /* CONFIG_NESTED_VIRT */
> +
> static inline bool nestedhvm_is_n2(struct vcpu *v)
> {
> if ( !nestedhvm_enabled(v->domain) ||
> diff --git a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> index da10d3fa96..d0c1ae29f6 100644
> --- a/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
> +++ b/xen/arch/x86/include/asm/hvm/vmx/vvmx.h
Most stubs in this header aren't needed.
You only need nvmx_cpu_up(), nvmx_cpu_dead(), nvmx_msr_read_intercept() and
nvmx_handle_insn(). There's also a bug in that last one...
> @@ -73,6 +73,8 @@ union vmx_inst_info {
> u32 word;
> };
>
> +#ifdef CONFIG_NESTED_VIRT
> +
> int cf_check nvmx_vcpu_initialise(struct vcpu *v);
> void cf_check nvmx_vcpu_destroy(struct vcpu *v);
> int cf_check nvmx_vcpu_reset(struct vcpu *v);
> @@ -199,5 +201,77 @@ int nept_translate_l2ga(struct vcpu *v, paddr_t l2ga,
> uint64_t *exit_qual, uint32_t *exit_reason);
> int nvmx_cpu_up_prepare(unsigned int cpu);
> void nvmx_cpu_dead(unsigned int cpu);
> +int cf_check nvmx_vmexit_event(struct vcpu *v, const struct x86_event
> *event);
> +void nvmx_enqueue_n2_exceptions(struct vcpu *v,
> + unsigned long intr_fields, int error_code, uint8_t source);
> +
> +#else /* !CONFIG_NESTED_VIRT */
> +
> +static inline void nvmx_update_exec_control(struct vcpu *v, u32 value)
> +{
> + ASSERT_UNREACHABLE();
> +}
> +static inline void nvmx_update_secondary_exec_control(struct vcpu *v,
> + unsigned long value)
> +{
> + ASSERT_UNREACHABLE();
> +}
> +static inline void nvmx_update_exception_bitmap(struct vcpu *v,
> + unsigned long value)
> +{
> + ASSERT_UNREACHABLE();
> +}
> +static inline u64 nvmx_get_tsc_offset(struct vcpu *v)
> +{
> + ASSERT_UNREACHABLE();
> + return 0;
> +}
> +static inline void nvmx_set_cr_read_shadow(struct vcpu *v, unsigned int cr)
> +{
> + ASSERT_UNREACHABLE();
> +}
> +static inline bool nvmx_intercepts_exception(struct vcpu *v, unsigned int
> vector,
> + int error_code)
> +{
> + ASSERT_UNREACHABLE();
> + return false;
> +}
> +static inline int nvmx_n2_vmexit_handler(struct cpu_user_regs *regs,
> + unsigned int exit_reason)
> +{
> + ASSERT_UNREACHABLE();
> + return 0;
> +}
> +static inline void nvmx_idtv_handling(void)
> +{
> + ASSERT_UNREACHABLE();
> +}
> +static inline int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content)
> +{
> + /* return 0 to trigger #GP */
> + return 0;
> +}
> +static inline int nvmx_handle_vmx_insn(struct cpu_user_regs *regs,
> + unsigned int exit_reason)
> +{
> + ASSERT_UNREACHABLE();
> + return X86EMUL_EXCEPTION;
> +}
... here. This is perfectly reachable and will cause a hypervisor crash should
an L1 try to probe the VMX-family of instructions. Even on realease this would
behave very oddly because you're missing injecting #UD. This stub should be:
hvm_inject_hw_exception(X86_EXC_UD, X86_EVENT_NO_EC);
return X86EMUL_EXCEPTION;
> +static inline int nvmx_cpu_up_prepare(unsigned int cpu) { return 0; }
> +static inline void nvmx_cpu_dead(unsigned int cpu) { }
> +static inline void nvmx_enqueue_n2_exceptions(struct vcpu *v,
> + unsigned long intr_fields, int error_code, uint8_t source)
> +{
> + ASSERT_UNREACHABLE();
> +}
> +
> +static inline u64 get_vvmcs(const struct vcpu *vcpu, u32 encoding)
> +{
> + ASSERT_UNREACHABLE();
> + return 0;
> +}
> +
> +#endif /* CONFIG_NESTED_VIRT */
> +
> #endif /* __ASM_X86_HVM_VVMX_H__ */
>
> diff --git a/xen/arch/x86/mm/Makefile b/xen/arch/x86/mm/Makefile
> index 960f6e8409..aa15811c2e 100644
> --- a/xen/arch/x86/mm/Makefile
> +++ b/xen/arch/x86/mm/Makefile
> @@ -7,7 +7,7 @@ obj-$(CONFIG_SHADOW_PAGING) += guest_walk_4.o
> obj-$(CONFIG_VM_EVENT) += mem_access.o
> obj-$(CONFIG_MEM_PAGING) += mem_paging.o
> obj-$(CONFIG_MEM_SHARING) += mem_sharing.o
> -obj-$(CONFIG_HVM) += nested.o
> +obj-$(CONFIG_NESTED_VIRT) += nested.o
> obj-$(CONFIG_HVM) += p2m.o
> obj-y += p2m-basic.o
> obj-$(CONFIG_INTEL_VMX) += p2m-ept.o
> diff --git a/xen/arch/x86/mm/hap/Makefile b/xen/arch/x86/mm/hap/Makefile
> index 67c29b2162..efdc91ea82 100644
> --- a/xen/arch/x86/mm/hap/Makefile
> +++ b/xen/arch/x86/mm/hap/Makefile
> @@ -2,5 +2,6 @@ obj-y += hap.o
> obj-y += guest_walk_2.o
> obj-y += guest_walk_3.o
> obj-y += guest_walk_4.o
> -obj-y += nested_hap.o
> -obj-$(CONFIG_INTEL_VMX) += nested_ept.o
> +nested-y := nested_hap.o
> +nested-$(CONFIG_INTEL_VMX) += nested_ept.o
> +obj-$(CONFIG_NESTED_VIRT) += $(nested-y)
Why not use plain filter?
-obj-y += nested_hap.o
+obj-$(CONFIG_NESTED_VIRT) += nested_hap.o
-obj-$(CONFIG_INTEL_VMX) += nested_ept.o
+obj-$(filter $(CONFIG_NESTED_VIRT),$(CONFIG_INTEL_VMX)) += nested_ept.o
> diff --git a/xen/arch/x86/mm/p2m.h b/xen/arch/x86/mm/p2m.h
> index 635f5a7f45..63808dddcc 100644
> --- a/xen/arch/x86/mm/p2m.h
> +++ b/xen/arch/x86/mm/p2m.h
> @@ -25,9 +25,15 @@ void p2m_teardown_altp2m(struct domain *d);
> void p2m_flush_table_locked(struct p2m_domain *p2m);
> int __must_check p2m_remove_entry(struct p2m_domain *p2m, gfn_t gfn, mfn_t
> mfn,
> unsigned int page_order);
> +#ifdef CONFIG_NESTED_VIRT
> void p2m_nestedp2m_init(struct p2m_domain *p2m);
> int p2m_init_nestedp2m(struct domain *d);
> void p2m_teardown_nestedp2m(struct domain *d);
> +#else
> +static inline void p2m_nestedp2m_init(struct p2m_domain *p2m) { }
> +static inline int p2m_init_nestedp2m(struct domain *d) { return 0; }
> +static inline void p2m_teardown_nestedp2m(struct domain *d) { }
> +#endif
Seeing how there's a single callsite I'd rather see those callsites check for
IS_ENABLED(), I think.
>
> int ept_p2m_init(struct p2m_domain *p2m);
> void ept_p2m_uninit(struct p2m_domain *p2m);
> diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
> index 1b04947516..b1d865e1c8 100644
> --- a/xen/arch/x86/sysctl.c
> +++ b/xen/arch/x86/sysctl.c
> @@ -103,6 +103,8 @@ void arch_do_physinfo(struct xen_sysctl_physinfo *pi)
> pi->capabilities |= XEN_SYSCTL_PHYSCAP_hap;
> if ( IS_ENABLED(CONFIG_SHADOW_PAGING) )
> pi->capabilities |= XEN_SYSCTL_PHYSCAP_shadow;
> + if ( hvm_nested_virt_supported() )
> + pi->capabilities |= XEN_SYSCTL_PHYSCAP_nestedhvm;
> }
>
> long arch_do_sysctl(
> diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
> index 66c9b65465..b4bd1dd7b2 100644
> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -100,9 +100,11 @@ struct xen_sysctl_tbuf_op {
> /* Xen supports the Grant v1 and/or v2 ABIs. */
> #define XEN_SYSCTL_PHYSCAP_gnttab_v1 (1u << 8)
> #define XEN_SYSCTL_PHYSCAP_gnttab_v2 (1u << 9)
> +/* The platform supports nested HVM. */
> +#define XEN_SYSCTL_PHYSCAP_nestedhvm (1u << 10)
>
> /* Max XEN_SYSCTL_PHYSCAP_* constant. Used for ABI checking. */
> -#define XEN_SYSCTL_PHYSCAP_MAX XEN_SYSCTL_PHYSCAP_gnttab_v2
> +#define XEN_SYSCTL_PHYSCAP_MAX XEN_SYSCTL_PHYSCAP_nestedhvm
>
> #if defined(__arm__) || defined(__aarch64__)
> #define XEN_SYSCTL_PHYSCAP_ARM_SVE_MASK (0x1FU)
Cheers,
Alejandro
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |