%patch Index: xen-unstable.hg/xen/include/asm-x86/hvm/hvm_extensions.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ xen-unstable.hg/xen/include/asm-x86/hvm/hvm_extensions.h 2008-03-26 13:56:39.000000000 -0400 @@ -0,0 +1,166 @@ +/**************************************************************************** + | + | Copyright (c) [2007, 2008] Novell, Inc. + | All Rights Reserved. + | + | This program is free software; you can redistribute it and/or + | modify it under the terms of version 2 of the GNU General Public License as + | published by the Free Software Foundation. + | + | This program is distributed in the hope that it will be useful, + | but WITHOUT ANY WARRANTY; without even the implied warranty of + | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + | GNU General Public License for more details. + | + | You should have received a copy of the GNU General Public License + | along with this program; if not, contact Novell, Inc. + | + | To contact Novell about this file by physical or electronic mail, + | you may find current contact information at www.novell.com + | + |*************************************************************************** +*/ + +/* + * hvm_extensions.h + * Implement Hyperv extensions. + * Engineering Contact: K. Y. Srinivasan + */ + +#ifndef HVM_EXTENSION_H +#define HVM_EXTENSION_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int +hyperv_dom_create(struct domain *d); +void +hyperv_dom_destroy(struct domain *d); +int +hyperv_vcpu_initialize(struct vcpu *v); +void +hyperv_vcpu_up(struct vcpu *v); +void +hyperv_vcpu_destroy(struct vcpu *v); +int +hyperv_do_cpu_id(uint32_t input, struct cpu_user_regs *regs); +int +hyperv_do_rd_msr(uint32_t idx, struct cpu_user_regs *regs); +int +hyperv_do_wr_msr(uint32_t idx, struct cpu_user_regs *regs); +int +hyperv_do_hypercall(struct cpu_user_regs *pregs); +void +hyperv_do_migrate_timers(struct vcpu *v); +int +hyperv_initialize(struct domain *d); + + + + +static inline int +hyperx_intercept_domain_create(struct domain *d) +{ + if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) { + return(hyperv_dom_create(d)); + } + return (0); +} + +static inline void +hyperx_intercept_domain_destroy(struct domain *d) +{ + if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) { + hyperv_dom_destroy(d); + } +} + +static inline int +hyperx_intercept_vcpu_initialize(struct vcpu *v) +{ + struct domain *d = v->domain; + if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) { + return(hyperv_vcpu_initialize(v)); + } + return (0); +} + + +static inline void +hyperx_intercept_vcpu_up(struct vcpu *v) +{ + struct domain *d = current->domain; + if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) { + hyperv_vcpu_up(v); + } +} + +static inline void +hyperx_intercept_vcpu_destroy(struct vcpu *v) +{ + struct domain *d = v->domain; + if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) { + hyperv_vcpu_destroy(v); + } +} + +static inline int +hyperx_intercept_do_cpuid(uint32_t idx, struct cpu_user_regs *regs) +{ + struct domain *d = current->domain; + if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) { + return(hyperv_do_cpu_id(idx, regs)); + } + return (0); +} + +static inline int +hyperx_intercept_do_msr_read(uint32_t idx, struct cpu_user_regs *regs) +{ + struct domain *d = current->domain; + if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) { + return(hyperv_do_rd_msr(idx, regs)); + } + return (0); +} +static inline int +hyperx_intercept_do_msr_write(uint32_t idx, struct cpu_user_regs *regs) +{ + struct domain *d = current->domain; + if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) { + return(hyperv_do_wr_msr(idx, regs)); + } + return (0); +} + +static inline int +hyperx_intercept_do_hypercall(struct cpu_user_regs *regs) +{ + struct domain *d = current->domain; + if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) { + return(hyperv_do_hypercall(regs)); + } + return (0); +} + +static inline void +hyperx_intercept_do_migrate_timers(struct vcpu *v) +{ + struct domain *d = current->domain; + if (d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] ==1) { + return(hyperv_do_migrate_timers(v)); + } +} + + +int hyperx_initialize(struct domain *d); + +#endif Index: xen-unstable.hg/xen/arch/x86/hvm/hyperv/Makefile =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ xen-unstable.hg/xen/arch/x86/hvm/hyperv/Makefile 2008-03-26 13:56:39.000000000 -0400 @@ -0,0 +1,2 @@ +obj-y += hv_intercept.o +obj-y += hv_hypercall.o Index: xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_errno.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_errno.h 2008-03-26 13:56:39.000000000 -0400 @@ -0,0 +1,62 @@ +/**************************************************************************** + | + | Copyright (c) [2007, 2008] Novell, Inc. + | All Rights Reserved. + | + | This program is free software; you can redistribute it and/or + | modify it under the terms of version 2 of the GNU General Public License as + | published by the Free Software Foundation. + | + | This program is distributed in the hope that it will be useful, + | but WITHOUT ANY WARRANTY; without even the implied warranty of + | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + | GNU General Public License for more details. + | + | You should have received a copy of the GNU General Public License + | along with this program; if not, contact Novell, Inc. + | + | To contact Novell about this file by physical or electronic mail, + | you may find current contact information at www.novell.com + | + |*************************************************************************** +*/ + +/* + * hv_errno.h + * Error codes for the Novell Shim. + * + * Engineering Contact: K. Y. Srinivasan + */ + +#ifndef HV_ERRNO_H +#define HV_ERRNO_H + +#define HV_STATUS_SUCCESS 0x0000 +#define HV_STATUS_INVALID_HYPERCALL_CODE 0x0002 +#define HV_STATUS_INVALID_HYPERCALL_INPUT 0x0003 +#define HV_STATUS_INVALID_ALIGNMENT 0x0004 +#define HV_STATUS_INVALID_PARAMETER 0x0005 +#define HV_STATUS_ACCESS_DENIED 0x0006 +#define HV_STATUS_INVALID_PARTITION_STATE 0x0007 +#define HV_STATUS_OPERATION_DENIED 0x0008 +#define HV_STATUS_UNKNOWN_PROPERTY 0x0009 +#define HV_STATUS_PROPERTY_VALUE_OUT_OF_RANGE 0x000A +#define HV_STATUS_INSUFFICIENT_MEMORY 0x000B +#define HV_STATUS_PARTITION_TOO_DEEP 0x000C +#define HV_STATUS_INVALID_PARTITION_ID 0x000D +#define HV_STATUS_INVALID_VP_INDEX 0x000E +#define HV_STATUS_UNABLE_TO_RESTORE_STATE 0x000F +#define HV_STATUS_NOT_FOUND 0x0010 +#define HV_STATUS_INVALID_PORT_ID 0x0011 +#define HV_STATUS_INVALID_CONNECTION_ID 0x0012 +#define HV_STATUS_INSUFFICIENT_BUFFERS 0x0013 +#define HV_STATUS_NOT_ACKNOWLEDGED 0x0014 +#define HV_STATUS_INVALID_VP_STATE 0x0015 +#define HV_STATUS_ACKNOWLEDGED 0x0016 +#define HV_STATUS_INVALID_SAVE_RESTORE_STATE 0x0017 +#define HV_STATUS_NO_MEMORY_4PAGES 0x0100 +#define HV_STATUS_NO_MEMORY_16PAGES 0x0101 +#define HV_STATUS_NO_MEMORY_64PAGES 0x0102 +#define HV_STATUS_NO_MEMORY_256PAGES 0x0103 +#define HV_STATUS_NO_MEMORY_1024PAGES 0x0104 +#endif Index: xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_hypercall.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_hypercall.c 2008-03-26 14:18:32.000000000 -0400 @@ -0,0 +1,747 @@ +/**************************************************************************** + | + | Copyright (c) [2007, 2008] Novell, Inc. + | All Rights Reserved. + | + | This program is free software; you can redistribute it and/or + | modify it under the terms of version 2 of the GNU General Public License as + | published by the Free Software Foundation. + | + | This program is distributed in the hope that it will be useful, + | but WITHOUT ANY WARRANTY; without even the implied warranty of + | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + | GNU General Public License for more details. + | + | You should have received a copy of the GNU General Public License + | along with this program; if not, contact Novell, Inc. + | + | To contact Novell about this file by physical or electronic mail, + | you may find current contact information at www.novell.com + | + |*************************************************************************** +*/ + +/* + * nshypercall.c. + * This file implements the hypercall component of the hyperv Shim. + * + * Engineering Contact: K. Y. Srinivasan + */ + +#include +#include +#include +#include +#include +#include + +#include +#include "hv_shim.h" +#include "hv_errno.h" +#include "hv_hypercall.h" + + +void hv_collect_stats(int event, hv_vcpu_stats_t *statsp) +{ + switch (event) { + case HV_CSWITCH: + statsp->num_switches++; + return; + case HV_FLUSH_VA: + statsp->num_flushes++; + return; + case HV_FLUSH_RANGE: + statsp->num_flush_ranges++; + return; + case HV_FLUSH_VA_POSTED: + statsp->num_flushes_posted++; + return; + case HV_FLUSH_RANGE_POSTED: + statsp->num_flush_ranges_posted++; + return; + case HV_TPR_READ: + statsp->num_tpr_reads++; + return; + case HV_ICR_READ: + statsp->num_icr_reads++; + return; + case HV_TPR_WRITE: + statsp->num_tpr_writes++; + return; + case HV_ICR_WRITE: + statsp->num_icr_writes++; + return; + case HV_EOI_WRITE: + statsp->num_eoi_writes++; + return; + + case HV_GFS_ACQUIRE: + statsp->num_gfs_acquires++; + return; + case HV_GFS_RELEASE: + statsp->num_gfs_releases++; + return; + case HV_TLB_FLUSH: + statsp->num_tlb_flushes++; + return; + case HV_INVL_PG: + statsp->num_invl_pages++; + return; + case HV_TIMEOUTS: + statsp->num_time_outs++; + return; + } +} + +void +hv_print_stats(hv_partition_t *curp, int i) +{ + hv_vcpu_t *v; + v = &curp->vcpu_state[i]; + printk("Printing stats for vcpu ID: %d\n", i); + + printk("Number of context switches: %lu\n", v->stats.num_switches); + printk("Number of flushes: %lu\n", v->stats.num_flushes); + printk("Number of flushes posted: %lu\n", v->stats.num_flushes_posted); + printk("Number of flush ranges: %lu\n", v->stats.num_flush_ranges); + printk("Number of flush ranges posted: %lu\n", + v->stats.num_flush_ranges_posted); + printk("Number of TPR reads: %lu\n", v->stats.num_tpr_reads); + printk("Number of ICR reads: %lu\n", v->stats.num_icr_reads); + printk("Number of Eoi writes: %lu\n", v->stats.num_eoi_writes); + printk("Number of Tpr writes: %lu\n", v->stats.num_tpr_writes); + printk("Number of Icr writes: %lu\n", v->stats.num_icr_writes); + printk("Number of GFS acuires: %lu\n", v->stats.num_gfs_acquires); + printk("Number of GFS releases: %lu\n", v->stats.num_gfs_releases); + printk("Number of TLB flushes: %lu\n", v->stats.num_tlb_flushes); + printk("Number of INVLPG flushes: %lu\n", v->stats.num_invl_pages); + printk("Number of TIMEOUTS: %lu\n", v->stats.num_time_outs); + +} + + + +static int +hv_get_vp_registers(paddr_t input, paddr_t output) +{ + hv_vcpu_t *vcpup, *targetp; + hv_partition_t *curp = hv_get_current_partition(); + get_vp_registers_input_t *inbuf; + get_vp_registers_output_t *outbuf; + struct vcpu_guest_context *vcpu_ctx; + u32 *reg_indexp; + get_vp_registers_output_t *out_regp; + u32 num_output_bytes = 0; + + vcpup = &curp->vcpu_state[hv_get_current_vcpu_index()]; + inbuf = vcpup->input_buffer; + outbuf = vcpup->output_buffer; + out_regp = outbuf; + /* + * Copy the input data to the per-cpu input buffer. + * This may be an overkill; obviously it is better to only + * copy what we need. XXXKYS: Check with Mike. + */ + if (hvm_copy_from_guest_phys(inbuf, input, PAGE_SIZE)) { + return (HV_STATUS_INVALID_ALIGNMENT); + } + /* + * If the partition ID specified does not match with the current + * domain return appropriate error. + */ + if ((u64)current->domain->domain_id != inbuf->partition_id) { + return (HV_STATUS_ACCESS_DENIED); + } + if (inbuf->vp_index > MAX_VIRT_CPUS) { + return (HV_STATUS_INVALID_VP_INDEX); + } + targetp = &curp->vcpu_state[inbuf->vp_index]; + if (!(targetp->flags & HV_VCPU_UP)) { + return (HV_STATUS_INVALID_VP_STATE); + } + if ((vcpu_ctx = + xmalloc_bytes(sizeof(struct vcpu_guest_context))) + == NULL) { + return (HV_STATUS_INSUFFICIENT_MEMORY); + } + + /* + * Get the register state of the specified vcp. + */ + if (current->vcpu_id != inbuf->vp_index) { + vcpu_pause(targetp->xen_vcpu); + } + arch_get_info_guest(targetp->xen_vcpu, vcpu_ctx); + if (current->vcpu_id != inbuf->vp_index) { + vcpu_unpause(targetp->xen_vcpu); + } + /* + * Now that we have the register state; select what we want and + * populate the output buffer. + */ + reg_indexp = &inbuf->reg_index; + while (*reg_indexp != 0) { + switch (*reg_indexp) { + /* + * XXXKYS: need mapping code here; populate + * outbuf. + */ + panic("hv_get_vp_registers not supported\n"); + } + reg_indexp++; + out_regp++ ; /*128 bit registers */ + num_output_bytes +=16; + if ((char *)reg_indexp > ((char *)inbuf + PAGE_SIZE)) { + /* + *input list not reminated correctly; bail out. + */ + panic("hv_get_vp_registers:input list not terminated\n"); + break; + } + } + if (hvm_copy_to_guest_phys(output, outbuf, num_output_bytes)) { + /* Some problem copying data out*/ + panic("hv_get_vp_registers:copyout problem\n"); + } + xfree(vcpu_ctx); + return (HV_STATUS_SUCCESS); +} + +static int +hv_set_vp_registers(paddr_t input, paddr_t output) +{ + hv_vcpu_t *vcpup, *targetp; + hv_partition_t *curp = hv_get_current_partition(); + set_vp_registers_input_t *inbuf; + struct vcpu_guest_context *vcpu_ctx; + set_vp_register_spec_t *reg_indexp; + int ret_val = HV_STATUS_SUCCESS; + + vcpup = &curp->vcpu_state[hv_get_current_vcpu_index()]; + inbuf = vcpup->input_buffer; + /* + * Copy the input data to the per-cpu input buffer. + * This may be an overkill; obviously it is better to only + * copy what we need. XXXKYS: Check with Mike. + */ + if (hvm_copy_from_guest_phys(inbuf, input, PAGE_SIZE)) { + return (HV_STATUS_INVALID_ALIGNMENT); + } + /* + * If the partition ID specified does not match with the current + * domain return appropriate error. + */ + if ((u64)current->domain->domain_id != inbuf-> partition_id) { + return (HV_STATUS_ACCESS_DENIED); + } + if (inbuf->vp_index > MAX_VIRT_CPUS) { + return (HV_STATUS_INVALID_VP_INDEX); + } + targetp = &curp->vcpu_state[inbuf->vp_index]; + if (!(targetp->flags & HV_VCPU_UP)) { + return (HV_STATUS_INVALID_VP_STATE); + } + if ((vcpu_ctx = + xmalloc_bytes(sizeof(struct vcpu_guest_context))) + == NULL) { + return (HV_STATUS_INSUFFICIENT_MEMORY); + } + /* + * XXXKYS: Is it sufficient to just pause the target vcpu; on the + * xen side domain is paused for this call. CHECK. + */ + if (current->vcpu_id != inbuf->vp_index) { + vcpu_pause(targetp->xen_vcpu); + } + + arch_get_info_guest(targetp->xen_vcpu, vcpu_ctx); + /* + * Now that we have the register state; update the register state + * based on what we are given. + */ + reg_indexp = &inbuf->reg_spec; + /* + * XXXKYS: Assuming the list is terminated by a reg_name that is 0. + * Check with Mike. + */ + while (reg_indexp->reg_name != 0) { + switch (reg_indexp->reg_name) { + /* + * XXXKYS: need mapping code here; populate + * vcpu_ctx + */ + panic("hv_set_vp_registers not supported\n"); + } + reg_indexp++; + if ((char *)reg_indexp > ((char *)inbuf + PAGE_SIZE)) { + /* + *input list not reminated correctly; bail out. + */ + panic("hv_set_vp_registers:input list not terminated\n"); + break; + } + } + /* + * Now set register state. + * + * XXXKYS: Is it sufficient to just pause the target vcpu; on the + * xen side domain is paused for this call. CHECK. + */ + + if (arch_set_info_guest(targetp->xen_vcpu, vcpu_ctx)) { + ret_val = HV_STATUS_INVALID_PARAMETER; + } + if (current->vcpu_id != inbuf->vp_index) { + vcpu_unpause(targetp->xen_vcpu); + } + xfree(vcpu_ctx); + return (ret_val); +} + +static int +hv_switch_va(paddr_t input) +{ + hv_partition_t *curp = hv_get_current_partition(); + hv_vcpu_t *vcpup = &curp->vcpu_state[hv_get_current_vcpu_index()]; + + /* + * XXXKYS: the spec sys the asID is passed via memory at offset 0 of + * the page whose GPA is in the input register. However, it appears + * the current build of longhorn (longhorn-2007-02-06-x86_64-fv-02) + * passes the asID in the input register instead. Need to check if + * future builds do this. + */ + hvm_set_cr3(input); + HV_STATS_COLLECT(HV_CSWITCH, &vcpup->stats); + return (HV_STATUS_SUCCESS); +} + +static int +hv_flush_va(paddr_t input) +{ + hv_partition_t *curp = hv_get_current_partition(); + int i; + hv_vcpu_t *cur_vcpup; + + flush_va_t *flush_argp; + cpumask_t vcpu_mask; + u64 as_id, input_mask, ret_val; + int flush_global = 1; + + cur_vcpup = &curp->vcpu_state[hv_get_current_vcpu_index()]; + flush_argp = cur_vcpup->input_buffer; + + + if (hvm_copy_from_guest_phys(flush_argp, input, sizeof(*flush_argp))) { + return (HV_STATUS_INVALID_ALIGNMENT); + } + input_mask = flush_argp->p_mask; + as_id = flush_argp->as_handle; + cpus_clear(vcpu_mask); + /* + * Deal with all trivial error conditions. + */ + if (flush_argp->flags != 0 && (!(flush_argp->flags & + (HV_FLUSH_ALL_PROCESSORS | + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY)))) { + return (HV_STATUS_INVALID_PARAMETER); + } + if (((flush_argp->p_mask) == 0) && + !(flush_argp->flags & HV_FLUSH_ALL_PROCESSORS)) { + return (HV_STATUS_INVALID_PARAMETER); + } + + if (flush_argp->flags & HV_FLUSH_ALL_PROCESSORS) { + for (i=0; i< MAX_VIRT_CPUS; i++) { + if (current->domain->vcpu[i] != NULL) { + cpu_set(i, vcpu_mask); + } + } + } else { + i = 0; + while (input_mask) { + if (input_mask &0x1) { + cpu_set(i, vcpu_mask); + } + input_mask = (input_mask >> 1); + i++; + } + } + + if (flush_argp->flags & HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES) { + as_id = HV_ALL_AS; + } + if (flush_argp->flags & HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY) { + flush_global = 0; + } + /* + * Now operate on what we are given + * XXXKYS: For now we are ignoring as_id and fushGlobal flag. + * May have to revisit this. But first stash away the processed + * parameters for subsequent use. + */ + flush_argp->as_handle = as_id; + flush_argp->flags = flush_global; + flush_argp->v_mask = vcpu_mask; + + + ret_val = hv_build_hcall_retval(HV_STATUS_SUCCESS, 0); + hv_set_syscall_retval(guest_cpu_user_regs(), + curp->long_mode_guest, + ret_val); + HV_STATS_COLLECT(HV_FLUSH_VA_STAT, &cur_vcpup->stats); + panic("hv_flush_va not supported\n"); + return (HV_STATUS_SUCCESS); +} + +static int +hv_flush_va_range(paddr_t input, unsigned short start_index, +unsigned short rep_count, unsigned short *reps_done) +{ + hv_vcpu_t *cur_vcpup; + hv_partition_t *curp = hv_get_current_partition(); + flush_va_t *flush_argp; + cpumask_t vcpu_mask; + u64 as_id, input_mask, ret_val; + int flush_global = 1; + int flush_all_proc = 0; + int i; + + cur_vcpup = &curp->vcpu_state[hv_get_current_vcpu_index()]; + flush_argp = cur_vcpup->input_buffer; + ASSERT(rep_count >=1); + ASSERT(((sizeof(*flush_argp)) + 8*(rep_count -1)) <= PAGE_SIZE); + if (hvm_copy_from_guest_phys(flush_argp, input, + ((sizeof(*flush_argp)) + 8*(rep_count -1)))) { + return (HV_STATUS_INVALID_ALIGNMENT); + } + *reps_done = rep_count; + input_mask = flush_argp->p_mask; + as_id = flush_argp->as_handle; + cpus_clear(vcpu_mask); + /* + * Deal with all trivial error conditions. + */ + if (flush_argp->flags != 0 && (!(flush_argp->flags & + (HV_FLUSH_ALL_PROCESSORS | + HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | + HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY)))) { + return (HV_STATUS_INVALID_PARAMETER); + } + if ((flush_argp->p_mask == 0) && + !(flush_argp->flags & HV_FLUSH_ALL_PROCESSORS)) { + return (HV_STATUS_INVALID_PARAMETER); + } + + if (flush_argp->flags & HV_FLUSH_ALL_PROCESSORS) { + flush_all_proc = 1; + for (i=0; i< MAX_VIRT_CPUS; i++) { + if (current->domain->vcpu[i] != NULL) { + cpu_set(i, vcpu_mask); + } + } + } else { + i = 0; + /* + * populate the vcpu mask based on the input. + */ + while (input_mask) { + if (input_mask & 0x1) { + cpu_set(i, vcpu_mask); + } + input_mask = (input_mask >> 1); + i++; + } + } + if (flush_argp->flags & HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES) { + as_id = HV_ALL_AS; + } + if (flush_argp->flags & HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY) { + flush_global = 0; + } + /* + * Now operate on what we are given + * XXXKYS: For now we are ignoring as_id and fushGlobal flag. + * May have to revisit this. + * May have to revisit this. But first stash away the processed + * parameters for subsequent use. + */ + flush_argp->as_handle = as_id; + flush_argp->flags = flush_global; + flush_argp->v_mask = vcpu_mask; + + ret_val = hv_build_hcall_retval(HV_STATUS_SUCCESS, rep_count); + hv_set_syscall_retval(guest_cpu_user_regs(), + curp->long_mode_guest, + ret_val); + + + HV_STATS_COLLECT(HV_FLUSH_RANGE, &cur_vcpup->stats); + panic("hv_flush_vaRange not supported\n"); + return (HV_STATUS_SUCCESS); +} + +void +hv_handle_hypercall(u64 opcode, u64 input, u64 output, + u64 *ret_val) +{ + unsigned short verb; + unsigned short rep_count; + unsigned short reps_done =0; + unsigned short start_index; + hv_partition_t *curp = hv_get_current_partition(); + u64 partition_id; + int value; + + + verb = (short)(opcode & 0xffff); + rep_count = (short)((opcode >>32) & 0xfff); + start_index = (short)((opcode >> 48) & 0xfff); + switch (verb) { + case HV_CREATE_PARTITION: + /* + * Xen only allows dom0 to create domains. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_INITIALIZE_PARTITION: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_DELETE_PARTITION: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_GET_PARTITION_PROPERTY: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_SET_PARTITION_PROPERTY: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_GET_PARTITION_ID: + if (!hv_privilege_check(curp, HV_ACCESS_PARTITION_ID)) { + *ret_val = + hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + } + partition_id = (u64)current->domain->domain_id; + if (hvm_copy_to_guest_phys(output, + &partition_id, 8)) { + /* + * Invalid output area. + */ + *ret_val = + hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + } + *ret_val = hv_build_hcall_retval(HV_STATUS_SUCCESS, 0); + return; + case HV_GET_NEXT_CHILD_PARTITION: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_SET_LOGICAL_PROCESSOR_RUN_TIME_GROUP: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_CLEAR_LOGICAL_PROCESSOR_RUN_TIME_GROUP: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_NOTIFY_LOGICAL_PROCESSOR_POWER_STATE: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_GET_LOGICAL_PROCESSOR_RUN_TIME: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_DEPOSIT_MEMORY: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_WITHDRAW_MEMORY: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_GET_MEMORY_BALANCE: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_MAP_GPA_PAGES: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_UNMAP_GPA_PAGES: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_INSTALL_INTERCEPT: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_CREATE_VP: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_TERMINATE_VP: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_DELETE_VP: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_GET_NEXT_VP: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_GET_VP_REGISTERS: + *ret_val = hv_build_hcall_retval( + hv_get_vp_registers(input, output), 0); + return; + case HV_SET_VP_REGISTERS: + *ret_val = hv_build_hcall_retval( + hv_set_vp_registers(input, output), 0); + case HV_SWITCH_VA: + *ret_val = + hv_build_hcall_retval(hv_switch_va(input), 0); + return; + case HV_FLUSH_VA: + *ret_val = + hv_build_hcall_retval(hv_flush_va(input), 0); + return; + case HV_FLUSH_VA_LIST: + value = hv_flush_va_range(input, start_index, + rep_count, &reps_done); + *ret_val = hv_build_hcall_retval(value, reps_done); + return; + + case HV_TRASLATE_VA: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_READ_GPA: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_WRITE_GPA: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_ASSERT_VIRTUAL_INTERRUPT: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_CLEAR_VIRTUAL_INTERRUPT: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_CREATE_PORT: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_DELETE_PORT: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_CONNECT_PORT: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_GET_PORT_PROPERTY: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_DISCONNECT_PORT: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_POST_MESSAGE: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case HV_POST_EVENT: + /* + * We don't support this. + */ + *ret_val = hv_build_hcall_retval(HV_STATUS_ACCESS_DENIED, 0); + return; + case 0: + /* + * 32 bit longhorn invokes hypercall with verb == 0; need to + * check with Mike (XXXKYS). For now ignore it. + */ + *ret_val = + hv_build_hcall_retval(HV_STATUS_INVALID_HYPERCALL_CODE, 0); + return; + default: + printk("Unkown hypercall: verb is: %d\n", verb); + *ret_val = + hv_build_hcall_retval(HV_STATUS_INVALID_HYPERCALL_CODE, 0); + return; + } +} Index: xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_hypercall.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_hypercall.h 2008-03-26 13:56:39.000000000 -0400 @@ -0,0 +1,126 @@ +/**************************************************************************** + | + | Copyright (c) [2007, 2008] Novell, Inc. + | All Rights Reserved. + | + | This program is free software; you can redistribute it and/or + | modify it under the terms of version 2 of the GNU General Public License as + | published by the Free Software Foundation. + | + | This program is distributed in the hope that it will be useful, + | but WITHOUT ANY WARRANTY; without even the implied warranty of + | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + | GNU General Public License for more details. + | + | You should have received a copy of the GNU General Public License + | along with this program; if not, contact Novell, Inc. + | + | To contact Novell about this file by physical or electronic mail, + | you may find current contact information at www.novell.com + | + |*************************************************************************** +*/ + +/* + * nshypercall.h + * Memory layouts for the various hypercalls supported. + * + * Engineering Contact: K. Y. Srinivasan + */ + +#ifndef HV_HYPERCALL_H +#define HV_HYPERCALL_H + +#include + + +typedef struct get_vp_registers_input { + u64 partition_id; + u64 vp_index; + u32 reg_index; + u32 pad1; + u64 pad2; +} get_vp_registers_input_t; + +typedef struct get_vp_registers_output { + u64 low_value; + u64 high_value; +} get_vp_registers_output_t; + +typedef struct set_vp_register_spec { + u32 reg_name; + u32 pad; + u64 pad1; + u64 low_value; + u64 high_value; +} set_vp_register_spec_t; + +typedef struct set_vp_registers_input { + u64 partition_id; + u64 vp_index; + set_vp_register_spec_t reg_spec; +} set_vp_registers_input_t; + + +typedef struct flush_va { + u64 as_handle; + u64 flags; + union { + u64 processor_mask; + cpumask_t vcpu_mask; + } proc_mask; +#define p_mask proc_mask.processor_mask +#define v_mask proc_mask.vcpu_mask + u64 gva; +} flush_va_t; + +#define HV_FLUSH_ALL_PROCESSORS 0x00000001 +#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES 0x00000002 +#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY 0x00000004 + +#define HV_ALL_AS (-1) + +/* + * Hypercall verbs. + */ + +#define HV_CREATE_PARTITION 0x0010 +#define HV_INITIALIZE_PARTITION 0x0011 +#define HV_DELETE_PARTITION 0x0014 +#define HV_GET_PARTITION_PROPERTY 0x0017 +#define HV_SET_PARTITION_PROPERTY 0x0018 +#define HV_GET_PARTITION_ID 0x0015 +#define HV_GET_NEXT_CHILD_PARTITION 0x0016 +#define HV_SET_LOGICAL_PROCESSOR_RUN_TIME_GROUP 0x0005 +#define HV_CLEAR_LOGICAL_PROCESSOR_RUN_TIME_GROUP 0x0006 +#define HV_NOTIFY_LOGICAL_PROCESSOR_POWER_STATE 0x0007 +#define HV_GET_LOGICAL_PROCESSOR_RUN_TIME 0x0004 +#define HV_DEPOSIT_MEMORY 0x001C +#define HV_WITHDRAW_MEMORY 0x001D +#define HV_GET_MEMORY_BALANCE 0x001E +#define HV_MAP_GPA_PAGES 0x001A +#define HV_UNMAP_GPA_PAGES 0x001B +#define HV_INSTALL_INTERCEPT 0x0019 +#define HV_CREATE_VP 0x001F +#define HV_TERMINATE_VP 0x0020 +#define HV_DELETE_VP 0x0021 +#define HV_GET_NEXT_VP 0x0027 +#define HV_GET_VP_REGISTERS 0x0022 +#define HV_SET_VP_REGISTERS 0x0023 +#define HV_SWITCH_VA 0x0001 +#define HV_FLUSH_VA 0x0002 +#define HV_FLUSH_VA_LIST 0x0003 +#define HV_TRASLATE_VA 0x0024 +#define HV_READ_GPA 0x0025 +#define HV_WRITE_GPA 0x0026 +#define HV_ASSERT_VIRTUAL_INTERRUPT 0x002A +#define HV_CLEAR_VIRTUAL_INTERRUPT 0x002C +#define HV_CREATE_PORT 0x002D +#define HV_DELETE_PORT 0x002E +#define HV_CONNECT_PORT 0x002F +#define HV_GET_PORT_PROPERTY 0x0031 +#define HV_DISCONNECT_PORT 0x0030 +#define HV_POST_MESSAGE 0x0032 +#define HV_POST_EVENT 0x0034 + +#endif /* HV_HYPERCALL_H */ Index: xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_intercept.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_intercept.c 2008-03-26 14:26:38.000000000 -0400 @@ -0,0 +1,1695 @@ +/**************************************************************************** + | + | Copyright (c) [2007, 2008] Novell, Inc. + | All Rights Reserved. + | + | This program is free software; you can redistribute it and/or + | modify it under the terms of version 2 of the GNU General Public License as + | published by the Free Software Foundation. + | + | This program is distributed in the hope that it will be useful, + | but WITHOUT ANY WARRANTY; without even the implied warranty of + | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + | GNU General Public License for more details. + | + | You should have received a copy of the GNU General Public License + | along with this program; if not, contact Novell, Inc. + | + | To contact Novell about this file by physical or electronic mail, + | you may find current contact information at www.novell.com + | + |*************************************************************************** +*/ + +/* + * nsintercept.c. + * This file implements the intercepts to support the Hyperv Shim. + * + * Engineering Contact: K. Y. Srinivasan + */ + +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +/* + * Local includes; extension specific. + */ +#include "hv_errno.h" +#include "hv_shim.h" + + +/* + * Implement the Hyperv Shim. + */ + +extern struct cpuinfo_x86 boot_cpu_data; +extern struct hvm_mmio_handler vlapic_mmio_handler; + +static inline void +hv_inject_exception(int trap); + +static inline void +hv_hypercall_page_initialize(void *hypercall_page, hv_partition_t *curp); + +static inline void +hv_init_event_page(void *sief_page); + +static inline void +hv_init_message_page(void *sim_page); + +static inline void * +get_virt_from_gmfn(struct domain *d, unsigned long gmfn) +{ + unsigned long mfn = gmfn_to_mfn(d, gmfn); + if (mfn == INVALID_MFN) { + return (NULL); + } + return (map_domain_page_global(mfn)); +} + +static inline void +inject_interrupt(struct vcpu *v, int vector, int type) +{ + struct vlapic *vlapic = vcpu_vlapic(v); + + /* + * XXXKYS: Check the trigger mode. + */ + if (vlapic_set_irq(vlapic, vector, 1)) { + vcpu_kick(v); + } +} + +static inline unsigned long +get_mfn_from_gva(unsigned long va) +{ + uint32_t pfec = PFEC_page_present; + unsigned long gfn; + gfn = paging_gva_to_gfn(current, va, &pfec); + return (gmfn_to_mfn((current->domain), gfn)); +} + +static inline void * +get_virt_from_page_ptr(void *page) +{ + struct page_info *pg = page; + unsigned long mfn = page_to_mfn(pg); + return (map_domain_page_global(mfn)); +} + +static inline void +hv_write_guestid_msr(hv_partition_t *curp, hv_vcpu_t *cur_vcpu, u64 msr_content) +{ + curp->guest_id_msr = msr_content; + if (curp->guest_id_msr == 0) { + /* + * Guest has cleared the guest ID; + * clear the hypercall page. + */ + if (curp->hypercall_msr) { + cur_vcpu->flags &= ~HV_VCPU_UP; + } + } +} + + +static inline void +hv_write_hypercall_msr(hv_partition_t *curp, + hv_vcpu_t *cur_vcpu, + u64 msr_content) +{ + unsigned long gmfn; + void *hypercall_page; + struct domain *d = cur_vcpu->xen_vcpu->domain; + + spin_lock(&curp->lock); + gmfn = (msr_content >> 12); + if (curp->guest_id_msr == 0) { + /* Nothing to do if the guest is not registered*/ + spin_unlock(&curp->lock); + return; + } + /* + * Guest is registered; see if we can turn-on the + * hypercall page. + * XXXKYS: Can the guest write the GPA in one call and + * subsequently enable it? Check. For now assume that all the + * info is specified in one call. + */ + if (((u32)msr_content & (0x00000001)) == 0) { + /* + * The client is not enabling the hypercall; just + * ignore everything. + */ + spin_unlock(&curp->lock); + return; + } + hypercall_page = get_virt_from_gmfn(d,gmfn); + if (hypercall_page == NULL) { + /* + * The guest specified a bogus GPA; inject a GP fault + * into the guest. + */ + hv_inject_exception(TRAP_gp_fault); + spin_unlock(&curp->lock); + return; + } + hv_hypercall_page_initialize(hypercall_page, curp); + curp->hypercall_mfn = gmfn_to_mfn(d, gmfn); +#ifdef CONFIG_DOMAIN_PAGE + unmap_domain_page_global(hypercall_page); +#endif + curp->hypercall_msr = msr_content; + spin_unlock(&curp->lock); + cur_vcpu->flags |= HV_VCPU_UP; +} + + +static inline void hv_write_sx_msr(uint32_t idx, hv_partition_t *curp, + hv_vcpu_t *cur_vcpu, + u64 msr_content) +{ + unsigned long gmfn; + void *sx_page; + struct domain *d = cur_vcpu->xen_vcpu->domain; + gmfn = (msr_content >> 12); + /* + * Can the client enable the siefp and specify + * the base address in two + * different calls? XXXKYS: For now assume + * that it is done in one call. + */ + if (!((u32)msr_content & (0x00000001))) { + /* + * The client is not enabling the sx page; just + * ignore everything. + */ + return; + } + sx_page = get_virt_from_gmfn(d, gmfn); + if (sx_page == NULL) { + /* + * The guest specified a bogus GPA; inject a GP fault + * into the guest. + */ + hv_inject_exception(TRAP_gp_fault); + return; + } + switch (idx) { + case HV_MSR_SIEFP: + hv_init_event_page(sx_page); + cur_vcpu->siefp_msr = msr_content; + cur_vcpu->sief_page = sx_page; + break; + case HV_MSR_SIMP: + hv_init_message_page(sx_page); + cur_vcpu->simp_msr = msr_content; + cur_vcpu->sim_page = sx_page; + break; + } + +} + +int +hyperv_initialize(struct domain *d) +{ + int i; + printk("Hyperv extensions initialized\n"); + if (hyperv_dom_create(d)) { + printk("Hyperv dom create failed\n"); + return (1); + } + for (i=0; i < MAX_VIRT_CPUS; i++) { + if (d->vcpu[i] != NULL) { + if (hyperv_vcpu_initialize(d->vcpu[i])) { + int j; + for (j= (i-1); j >=0; j--) { + hyperv_vcpu_destroy(d->vcpu[j]); + } + hyperv_dom_destroy(d); + return (1); + } + } + } + return (0); +} + +/* + * Time this domain booted. + */ +s_time_t hv_domain_boot_time; + + +static inline u64 +hv_get_time_since_boot(void) +{ + u64 curTime = get_s_time(); + return ((curTime - hv_domain_boot_time)/100) ; +} + +static inline int +hv_call_from_bios(struct cpu_user_regs *regs) +{ + if (hvm_paging_enabled(current)) { + return (0); + } else { + return (1); + } +} + + +static inline void +hv_inject_exception(int trap) +{ + hvm_funcs.inject_exception(trap, 0, 0); +} + + +static inline int +hv_os_registered(void) +{ + hv_partition_t *curp = hv_get_current_partition(); + return (curp->guest_id_msr != 0?1:0); +} + + + +static inline void +hv_set_partition_privileges(hv_partition_t *hvpp) +{ + /* + * This is based on the hypervisor spec under section 5.2.3. + */ + hvpp->privileges = 0x000000020000007f; +} + +static inline u32 +hv_get_recommendations(void) +{ + /* + *For now we recommend all the features. Need to validate. + */ + if ( paging_mode_hap(current->domain)) { + /* + * If HAP is enabled; the guest should not use TLB flush + * related enlightenments. + */ + return (0x19); + } else { + /* + * For now disable TLB flush enlightenments. + */ + return (0x19); + } +} + + +static inline void +hv_set_partition_features(hv_partition_t *hvpp) +{ + hvpp->supported_features = 0x1f; +} + +static inline u16 +hv_get_guest_major(void) +{ + //KYS: Check! + return (0); +} +static inline u16 +hv_get_guest_minor(void) +{ + //KYS: Check! + return (0); +} +static inline u32 +hv_get_guest_service_pack(void) +{ + //KYS: Check! + return (0); +} + +static inline u8 +hv_get_guest_service_branch_info(void) +{ + //KYS: Check! + return (0); +} +static inline u32 +hv_get_guest_service_number(void) +{ + //KYS: Check! + return (0); +} + +static inline u32 +hv_get_supported_synthetic_msrs(void) +{ + /* + * All MSRS in the spec version 0.83 including RESET MSR. + */ + return (0xff); +} + + +static inline u32 +hv_get_max_vcpus_supported(void) +{ + return MAX_VIRT_CPUS; +} + +static inline u32 +hv_get_max_lcpus_supported(void) +{ + return NR_CPUS; +} + + +static inline void +hv_read_icr(u64 *icr_content) +{ + u32 icr_low, icr_high; + u64 ret_val; + + + icr_low = vlapic_mmio_handler.read_handler(current, + (vlapic_base_address(vcpu_vlapic(current)) + 0x300), 4); + icr_high = vlapic_mmio_handler.read_handler(current, + (vlapic_base_address(vcpu_vlapic(current)) + 0x310), 4); + ret_val = icr_high; + *icr_content = ((ret_val << 32) | icr_low); + +} + +static inline void +hv_read_tpr(u64 *tpr_content) +{ + u32 tpr_low; + + + tpr_low = vlapic_mmio_handler.read_handler(current, + (vlapic_base_address(vcpu_vlapic(current)) + 0x80), 4); + *tpr_content = (u64)tpr_low; + +} + +static inline void +hv_write_eoi(u64 msr_content) +{ + u32 eoi = (u32)msr_content; + + vlapic_mmio_handler.write_handler(current, + (vlapic_base_address(vcpu_vlapic(current)) + 0xb0), 4, eoi); + +} + +static inline void +hv_write_icr(u64 msr_content) +{ + u32 icr_low, icr_high; + icr_low = (u32)msr_content; + icr_high = (u32)(msr_content >> 32); + + if (icr_high != 0) { + vlapic_mmio_handler.write_handler(current, + (vlapic_base_address(vcpu_vlapic(current)) + 0x310), 4, + icr_high); + } + if (icr_low != 0) { + vlapic_mmio_handler.write_handler(current, + (vlapic_base_address(vcpu_vlapic(current)) + 0x300), 4, + icr_low); + } + +} + +static inline void +hv_write_tpr(u64 msr_content) +{ + u32 tpr = (u32)msr_content; + + + vlapic_mmio_handler.write_handler(current, + (vlapic_base_address(vcpu_vlapic(current)) + 0x80), 4, tpr); + +} + +static inline void +hv_hypercall_page_initialize(void *hypercall_page, hv_partition_t *curp) +{ + char *p; + + if (hvm_funcs.guest_x86_mode(current) == 8) { + curp->long_mode_guest = 1; + } else { + curp->long_mode_guest = 0; + } + + memset(hypercall_page, 0, PAGE_SIZE); + p = (char *)(hypercall_page) ; + *(u8 *)(p + 0) = 0x0f; /* vmcall */ + *(u8 *)(p + 1) = 0x01; + if (boot_cpu_data.x86_vendor == 0) { + *(u8 *)(p + 2) = 0xc1; + } else { + *(u8 *)(p + 2) = 0xd9; + } + *(u8 *)(p + 3) = 0xc3; /* ret */ +} + +static inline void +hv_init_event_page(void *sief_page) +{ + memset(sief_page, 0, PAGE_SIZE); +} + +static inline void +hv_init_message_page(void *sim_page) +{ + memset(sim_page, 0, PAGE_SIZE); +} + + +static inline void +hv_process_message_q(hv_partition_t *curp, hv_vcpu_t *cur_vcpu) +{ + /* + * XXXKYS: we currently do not support queued messages. + */ +} + +static inline void +hv_schedule_time_out(hv_vcpu_timer_state_t *timer) +{ + /* + * We maintain the count in the units of 100ns. Furthermore, + * this is not relative to NOW() but rather absolute. + */ + set_timer(&timer->vcpu_timer, (timer->count * 100)); +} + +static void +hv_timeout_handler(void *arg) +{ + hv_vcpu_timer_state_t *timer_data = arg; + hv_vcpu_t *cur_vcpu = timer_data->this_cpu; + int int_num; + int vector; + if (!(cur_vcpu->control_msr & 0x9)) { + goto timeout_postprocess; + } + /* + * SynIC is enabled; do further processing. Timeouts are posted as + * messages; verify if the message page is enabled. + */ + if (!(cur_vcpu->simp_msr & 0x1)) { + goto timeout_postprocess; + } + int_num = (((u32)(timer_data->config >> 16)) & 0x0000000f); + /* + * First post the message and then optionally deal with the + * interrupt notification. + */ + if (cur_vcpu->sim_page == NULL) { + panic("Novell Shim: Sim page not setup\n"); + } + if ((((hv_message_t *)cur_vcpu->sim_page)[int_num]).type != + TYPE_NONE) { + /* + * The message slot is not empty just silently return. + */ + goto timeout_postprocess; + } + /* + * The slot is available; post the message. + */ + (((hv_timer_message_t *)cur_vcpu->sim_page)[int_num]).type = + TYPE_TIMER_EXPIRED; + (((hv_timer_message_t *)cur_vcpu->sim_page)[int_num]).size = + sizeof(hv_timer_message_t); + (((hv_timer_message_t *)cur_vcpu->sim_page)[int_num]).timer_index = + timer_data->timer_index; + (((hv_timer_message_t *)cur_vcpu->sim_page)[int_num]).expiration_time = + timer_data->count; + if ((cur_vcpu->int_msr[int_num] >> 16) &0x1) { + /* + * The designated sintx register is masked; just return. + */ + goto timeout_postprocess; + } + vector = ((u32)cur_vcpu->int_msr[int_num] &0xff); + + /* + * Now post the interrupt to the VCPU. + * XXXKYS: What is the delivery mode for interrupts delivered here. + * Check with Mike? + */ + inject_interrupt(current, vector, APIC_DM_FIXED); + + /* + * If auto eoi is set; deal with that. + */ + if (((u32)(cur_vcpu->int_msr[int_num] >> 16)) & 0x1) { + hv_write_eoi(0); + } + +timeout_postprocess: + /* + * Prior to returning, deal with all the post timeout issues. + */ + if (((u32)(timer_data->config)) & 0x00000002) { + HV_STATS_COLLECT(HV_TIMEOUTS, &cur_vcpu->stats); + hv_schedule_time_out(timer_data); + } +} + +static inline void +hv_timer_init(hv_vcpu_t *vcpup, int timer) +{ + vcpup->timers[timer].config = 0; + vcpup->timers[timer].count = 0; + vcpup->timers[timer].this_cpu = vcpup; + vcpup->timers[timer].timer_index = timer; + init_timer(&vcpup->timers[timer].vcpu_timer, hv_timeout_handler, + &vcpup->timers[timer], current->processor); +} + +static inline int +hv_access_time_refcnt(hv_partition_t *curp, u64 *msr_content) +{ + if (!hv_privilege_check(curp, HV_ACCESS_TIME_REF_CNT)) { + /* + * The partition does not have the privilege to + * read this; return error. + */ + return (0); + } + *msr_content = hv_get_time_since_boot(); + return (1); +} + +void +hyperv_do_migrate_timers(struct vcpu *v) +{ + hv_partition_t *curp = hv_get_current_partition(); + hv_vcpu_t *vcpup; + int i; + vcpup = &curp->vcpu_state[hv_get_current_vcpu_index()]; + + for (i=0; i<4; i++) { + migrate_timer(&vcpup->timers[i].vcpu_timer, v->processor); + } +} + + +void +hyperv_vcpu_up(struct vcpu *v) +{ + hv_partition_t *curp = hv_get_current_partition(); + hv_vcpu_t *vcpup; + vcpup = &curp->vcpu_state[v->vcpu_id]; + vcpup->flags |= HV_VCPU_UP; +} + +int +hyperv_do_hypercall(struct cpu_user_regs *pregs) +{ + hv_partition_t *curp = hv_get_current_partition(); + hv_vcpu_t *vcpup; + int long_mode_guest = curp->long_mode_guest; + unsigned long hypercall_mfn; + unsigned long gmfn; + gmfn = (curp->hypercall_msr >> 12); + + hypercall_mfn = get_mfn_from_gva(pregs->eip); + + if (hypercall_mfn == curp->hypercall_mfn) { + u64 opcode, input, output, ret_val; + vcpup = &curp->vcpu_state[hv_get_current_vcpu_index()]; + + /* + * This is an extension hypercall; process it; but first make + * sure that the CPU is in the right state for invoking + * the hypercall - protected mode at CPL 0. + */ + if (hv_invalid_cpu_state()) { + hv_inject_exception(TRAP_gp_fault); + ret_val = hv_build_hcall_retval( + HV_STATUS_INVALID_VP_STATE, 0); + hv_set_syscall_retval(pregs, long_mode_guest, ret_val); + return (1); + } + if (long_mode_guest) { + opcode = pregs->ecx; + input = pregs->edx; + output = pregs->r8; + } else { + opcode = + ((((u64)pregs->edx) << 32) | ((u64)pregs->eax)); + input = + ((((u64)pregs->ebx) << 32) | ((u64)pregs->ecx)); + output = + ((((u64)pregs->edi) << 32) | ((u64)pregs->esi)); + } + ASSERT(vcpup->nsVcplockDepth == 0); + hv_handle_hypercall(opcode, input, output, &ret_val); + hv_set_syscall_retval(pregs, long_mode_guest, ret_val); + return (1); + } + /* + * This hypercall page is not the page for the Veridian extension. + */ + return (0); +} + + +int +hyperv_dom_create(struct domain *d) +{ + hv_partition_t *hvpp; + hvpp = xmalloc_bytes(sizeof(hv_partition_t)); + if (hvpp == NULL) { + printk("Hyprv Dom Create: Memory allocation failed\n"); + return (1); + } + memset(hvpp, 0, sizeof(*hvpp)); + spin_lock_init(&hvpp->lock); + /* + * Set the partition wide privilege; We can start with no privileges + * and progressively turn on fancier hypervisor features. + */ + hv_set_partition_privileges(hvpp); + hv_set_partition_features(hvpp); + /* + * Stash away pointer to our state in the hvm domain structure. + */ + d->arch.hvm_domain.hyperv_handle = hvpp; + hv_domain_boot_time = get_s_time(); + return (0); +} + +void +hyperv_dom_destroy(struct domain *d) +{ + int i; + hv_partition_t *curp = d->arch.hvm_domain.hyperv_handle; + printk("Hyper-V Domain Being Destroyed\n"); + ASSERT(curp != NULL); +#ifdef HV_STATS + printk("DUMP STATS\n"); + printk("GFS cpucount is %d\n", curp->flush_state.count); + if (curp->flush_state.owner != NULL) { + printk("GFS owner is %d\n", curp->flush_state.owner->vcpu_id); + } else { + printk("GFS is free\n"); + } + if (!cpus_empty(curp->flush_state.waiters)) { + printk("GFS: waiters not empty\n"); + } else { + printk("GFS: waiters empty\n"); + } + for (i=0; i < MAX_VIRT_CPUS; i++) { + if (d->vcpu[i] != NULL) { + hv_print_stats(curp, i); + } + } +#endif //HV_STATS + + xfree(d->arch.hvm_domain.hyperv_handle); + d->arch.hvm_domain.hyperv_handle = NULL; +} + +int +hyperv_vcpu_initialize(struct vcpu *v) +{ + hv_vcpu_t *vcpup; + hv_partition_t *curp = v->domain->arch.hvm_domain.hyperv_handle; + int i; + vcpup = &curp->vcpu_state[v->vcpu_id]; + atomic_inc(&curp->vcpus_active); + if (v->vcpu_id == 0) { + vcpup->flags |= HV_VCPU_BOOT_CPU; + } + /* + * Initialize all the synthetic MSRs corresponding to this VCPU. + * Note that all state is set to 0 to begin + * with. + */ + vcpup->version_msr = 0x00000001; + /* + * Initialize the synthetic timet structures. + */ + for (i=0; i < 4; i++) { + hv_timer_init(vcpup, i); + } + /* + * Setup the input page for handling hypercalls. + * + */ + vcpup->input_buffer_page = + alloc_domheap_page(NULL); + if (vcpup->input_buffer_page == NULL) { + printk("Hyperv vcpu init: Memory allocation failed\n"); + return (1); + } + vcpup->input_buffer = + get_virt_from_page_ptr(vcpup->input_buffer_page); + if (vcpup->input_buffer == NULL) { + printk("Hyperv vcpu init: Coud not get VA\n"); + free_domheap_pages(vcpup->input_buffer_page, 0); + return (1); + } + memset(vcpup->input_buffer, 0, PAGE_SIZE); + vcpup->output_buffer_page = + alloc_domheap_page(NULL); + + if (vcpup->output_buffer_page == NULL) { + printk("Hyperv vcpu init: Memory allocation failed\n"); +#ifdef CONFIG_DOMAIN_PAGE + unmap_domain_page_global(vcpup->input_buffer); +#endif + free_domheap_pages(vcpup->input_buffer_page, 0); + return (1); + } + vcpup->output_buffer = + get_virt_from_page_ptr(vcpup->output_buffer_page); + if (vcpup->output_buffer == NULL) { + printk("Hyperv vcpu init: Coud not get VA\n"); + free_domheap_pages(vcpup->output_buffer_page, 0); +#ifdef CONFIG_DOMAIN_PAGE + unmap_domain_page_global(vcpup->input_buffer); +#endif + free_domheap_pages(vcpup->input_buffer_page, 0); + return (1); + } + vcpup->xen_vcpu = v; + + return (0); +} + +void +hyperv_vcpu_destroy(struct vcpu *v) +{ + hv_vcpu_t *vcpup; + hv_partition_t *curp = v->domain->arch.hvm_domain.hyperv_handle; + int i; + + vcpup = &curp->vcpu_state[v->vcpu_id]; + atomic_dec(&curp->vcpus_active); + vcpup->flags &= ~HV_VCPU_UP; + /* + * Get rid of the pages we have allocated for this VCPU. + */ +#ifdef CONFIG_DOMAIN_PAGE + unmap_domain_page_global(vcpup->sief_page); + unmap_domain_page_global(vcpup->sim_page); + unmap_domain_page_global(vcpup->input_buffer); + unmap_domain_page_global(vcpup->output_buffer); +#endif + + free_domheap_pages(vcpup->input_buffer_page, 0); + free_domheap_pages(vcpup->output_buffer_page, 0); + /* + * Kill the timers + */ + for (i=0; i < 4; i++) { + kill_timer(&vcpup->timers[i].vcpu_timer); + } + return; +} + +static int +hyperv_vcpu_save(struct domain *d, hvm_domain_context_t *h) +{ + struct vcpu *v; + struct hvm_hyperv_cpu ctxt; + + hv_vcpu_t *vcpup; + hv_partition_t *curp = d->arch.hvm_domain.hyperv_handle; + int i; + + if (curp == NULL) { + return 0; + } + for_each_vcpu(d, v) { + vcpup = &curp->vcpu_state[v->vcpu_id]; + + /* + * We don't need to save state for a + * vcpu that is down; the restore + * code will leave it down if there is nothing saved. + */ + if ( test_bit(_VPF_down, &v->pause_flags) ) + continue; + ctxt.control_msr = vcpup->control_msr; + ctxt.version_msr = vcpup->version_msr; + ctxt.sief_msr = vcpup->siefp_msr; + ctxt.simp_msr = vcpup->simp_msr; + ctxt.eom_msr = vcpup->eom_msr; + for (i=0; i < 16; i++) + ctxt.int_msr[i] = vcpup->int_msr[i]; + for (i=0; i < 4; i++) { + ctxt.timers[i].config = vcpup->timers[i].config; + /* + * Save the count in units of 100ns relative to NOW() + * When we restore we will add NOW() to properly + * account for the elapsed time when the timer was + * active. + */ + if (vcpup->timers[i].count > ((NOW())/100)) { + ctxt.timers[i].count = + (vcpup->timers[i].count - ((NOW())/100)); + } else { + ctxt.timers[i].count = 0; + } + } + if ( hvm_save_entry(HYPERV_CPU, + v->vcpu_id, h, &ctxt) != 0 ) + return 1; + } + + return 0; +} + +static int +hyperv_vcpu_restore(struct domain *d, hvm_domain_context_t *h) +{ + int vcpuid, i; + struct hvm_hyperv_cpu ctxt; + + hv_vcpu_t *vcpup; + hv_partition_t *curp = d->arch.hvm_domain.hyperv_handle; + + if (curp == NULL) { + return 0; + } + /* Which vcpu is this? */ + vcpuid = hvm_load_instance(h); + vcpup = &curp->vcpu_state[vcpuid]; + ASSERT(vcpup != NULL); + if ( hvm_load_entry(HYPERV_CPU, h, &ctxt) != 0 ) + return -22; + + vcpup->control_msr = ctxt.control_msr; + vcpup->version_msr = ctxt.version_msr; + + hv_write_sx_msr(HV_MSR_SIEFP, curp, vcpup, ctxt.sief_msr); + hv_write_sx_msr(HV_MSR_SIMP, curp, vcpup, ctxt.simp_msr); + + vcpup->eom_msr = ctxt.eom_msr; + for (i=0; i<16; i++) + vcpup->int_msr[i] = ctxt.int_msr[i]; + for (i=0; i < 4; i++) { + vcpup->timers[i].config = ctxt.timers[i].config; + vcpup->timers[i].count = + (ctxt.timers[i].count + ((NOW())/100)); + if ((vcpup->timers[i].config | 0x9)) { + /* + * XXXKYS: Some issues with regards to time + * management here: + * 1) We will ignore the elapsed wall clock time + * when the domain was not running. + * 2) Clearly we should account fot the time that + * has elapsed when the domain was running with + * respect to the timeouts that were scheduled + * prior to saving the domain. + * We will deal with on the save side. + */ + hv_schedule_time_out(&vcpup->timers[i]); + HV_STATS_COLLECT(HV_TIMEOUTS, &vcpup->stats); + } + } + + vcpup->flags |= HV_VCPU_UP; + return 0; +} + + + + +static int +hyperv_dom_save(struct domain *d, hvm_domain_context_t *h) +{ + struct hvm_hyperv_dom ctxt; + hv_partition_t *curp = d->arch.hvm_domain.hyperv_handle; + + if (curp == NULL) { + return 0; + } + + ctxt.guestid_msr = curp->guest_id_msr; + ctxt.hypercall_msr = curp->hypercall_msr; + ctxt.long_mode = curp->long_mode_guest; + ctxt.ext_id = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR]; + return (hvm_save_entry(HYPERV_DOM, 0, h, &ctxt)); +} + +static int +hyperv_dom_restore(struct domain *d, hvm_domain_context_t *h) +{ + struct hvm_hyperv_dom ctxt; + hv_partition_t *curp; + + if ( hvm_load_entry(HYPERV_DOM, h, &ctxt) != 0 ) + return -22; + d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR] = ctxt.ext_id; + if ((ctxt.ext_id == 0) || (ctxt.ext_id > 1)) { + return 0; + } + if (hyperv_initialize(d)) { + return -22; + } + curp = d->arch.hvm_domain.hyperv_handle; + + curp->guest_id_msr = ctxt.guestid_msr; + curp->hypercall_msr = ctxt.hypercall_msr; + curp->long_mode_guest = ctxt.long_mode; + curp->hypercall_mfn = + gmfn_to_mfn(d, (ctxt.hypercall_msr >> 12)); + + return 0; +} + +HVM_REGISTER_SAVE_RESTORE(HYPERV_DOM, hyperv_dom_save, hyperv_dom_restore, + 1, HVMSR_PER_DOM); + + +HVM_REGISTER_SAVE_RESTORE(HYPERV_CPU,hyperv_vcpu_save,hyperv_vcpu_restore, + 1, HVMSR_PER_VCPU); + + +static int +hv_preprocess_cpuid_leaves(unsigned int input, struct cpu_user_regs *regs) +{ + uint32_t idx; + struct domain *d = current->domain; + int extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR]; + + if (extid == 1) { + /* + * Enlightened Windows guest; need to remap and handle + * leaves used by PV front-end drivers. + */ + if ((input >= 0x40000000) && (input <= 0x40000005)) { + return (0); + } + /* + * PV drivers use cpuid to query the hypervisor for details. On + * Windows we will use the following leaves for this: + * + * 4096: VMM Sinature (corresponds to 0x40000000 on Linux) + * 4097: VMM Version (corresponds to 0x40000001 on Linux) + * 4098: Hypercall details (corresponds to 0x40000002 on Linux) + */ + if ((input >= 0x40001000) && (input <= 0x40001002)) { + idx = (input - 0x40001000); + switch (idx) { + case 0: + regs->eax = 0x40000002; /* Largest leaf */ + regs->ebx = 0x566e6558;/*Signature 1: "XenV" */ + regs->ecx = 0x65584d4d; /*Signature 2: "MMXe" */ + regs->edx = 0x4d4d566e; /*Signature 3: "nVMM"*/ + break; + case 1: + regs->eax = + (XEN_VERSION << 16) | + XEN_SUBVERSION; + regs->ebx = 0; /* Reserved */ + regs->ecx = 0; /* Reserved */ + regs->edx = 0; /* Reserved */ + break; + + case 2: + regs->eax = 1; /*Number of hypercall-transfer pages*/ + /*In linux this is 0x40000000 */ + regs->ebx = 0x40001000; /* MSR base address */ + regs->ecx = 0; /* Features 1 */ + regs->edx = 0; /* Features 2 */ + break; + } + } + return (1); + } else { + /* + * For now this is all other "enlightened guests" + */ + if ((input >= 0x40000000) && (input <= 0x40000002)) { + /* + * These leaves have already been correctly + * processed; just return. + */ + return (1); + } + return (0); + } +} + +int +hyperv_do_cpu_id(unsigned int input, struct cpu_user_regs *regs) +{ + uint32_t idx; + + /* + * hvmloader uses cpuid to set up a hypercall page; we don't want to + * intercept calls coming from the bootstrap (bios) code in the HVM + * guest; we discriminate based on the instruction pointer. + */ + if (hv_call_from_bios(regs)) { + /* + * We don't intercept this. + */ + return (0); + } + + if (input == 0x00000001) { + regs->ecx = (regs->ecx | 0x80000000); + return (1); + } + + if (hv_preprocess_cpuid_leaves(input, regs)) { + return (0); + } + idx = (input - 0x40000000); + + switch (idx) { + case 0: + /* + * 0x40000000: Hypervisor identification. + */ + regs->eax = 0x40000005; /* For now clamp this */ + regs->ebx = 0x65766f4e; /* "Nove" */ + regs->ecx = 0x68536c6c; /* "llSh" */ + regs->edx = 0x76486d69; /* "imHv" */ + break; + + case 1: + /* + * 0x40000001: Hypervisor identification. + */ + regs->eax = 0x31237648; /* "Hv#1*/ + regs->ebx = 0; /* Reserved */ + regs->ecx = 0; /* Reserved */ + regs->edx = 0; /* Reserved */ + break; + case 2: + /* + * 0x40000002: Guest Info + */ + if (hv_os_registered()) { + regs->eax = hv_get_guest_major(); + regs->ebx = + (hv_get_guest_major() << 16) | hv_get_guest_minor(); + regs->ecx = hv_get_guest_service_pack(); + regs->edx = + (hv_get_guest_service_branch_info() << 24) | + hv_get_guest_service_number(); + } else { + regs->eax = 0; + regs->ebx = 0; + regs->ecx = 0; + regs->edx = 0; + } + break; + case 3: + /* + * 0x40000003: Feature identification. + */ + regs->eax = hv_get_supported_synthetic_msrs(); + /* We only support AcessSelfPartitionId bit 1 */ + regs->ebx = 0x2; + regs->ecx = 0; /* Reserved */ + regs->edx = 0; /*No MWAIT (bit 0), No debugging (bit 1)*/ + break; + case 4: + /* + * 0x40000004: Imlementation recommendations. + */ + regs->eax = hv_get_recommendations(); + regs->ebx = 0; /* Reserved */ + regs->ecx = 0; /* Reserved */ + regs->edx = 0; /* Reserved */ + break; + case 5: + /* + * 0x40000005: Implementation limits. + * Currently we retrieve maximum number of vcpus and + * logical processors (hardware threads) supported. + */ + regs->eax = hv_get_max_vcpus_supported(); + regs->ebx = hv_get_max_lcpus_supported(); + regs->ecx = 0; /* Reserved */ + regs->edx = 0; /* Reserved */ + break; + + default: + /* + * We don't handle this leaf. + */ + return (0); + + } + return (1); +} + +int +hyperv_do_rd_msr(uint32_t idx, struct cpu_user_regs *regs) +{ + hv_partition_t *curp = hv_get_current_partition(); + unsigned int vcp_index = hv_get_current_vcpu_index(); + u64 msr_content = 0; + hv_vcpu_t *cur_vcpu = &curp->vcpu_state[vcp_index]; + int syn_int, timer; + struct domain *d = current->domain; + int extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR]; + u64 timer_count; + + /* + * hvmloader uses rdmsr; we don't want to + * intercept calls coming from the bootstrap (bios) code in the HVM + * guest; we descriminate based on the instruction pointer. + */ + if (hv_call_from_bios(regs)) { + /* + * We don't intercept this. + */ + return (0); + } + if (extid > 1) { + /* + * For now this is all other "Enlightened" operating systems + * other than Longhorn. + */ + if (idx == 0x40000000) { + /* + * PV driver hypercall setup. Let xen handle this. + */ + return (0); + } + if (idx == 0x40001000) { + idx = 0x40000000; + } + } + switch (idx) { + case HV_MSR_GUEST_OS_ID: + spin_lock(&curp->lock); + regs->eax = (u32)(curp->guest_id_msr & 0xFFFFFFFF); + regs->edx = (u32)(curp->guest_id_msr >> 32); + spin_unlock(&curp->lock); + break; + case HV_MSR_HYPERCALL: + spin_lock(&curp->lock); + regs->eax = (u32)(curp->hypercall_msr & 0xFFFFFFFF); + regs->edx = (u32)(curp->hypercall_msr >> 32); + spin_unlock(&curp->lock); + if ((((u32)curp->hypercall_msr) & (0x00000001)) != 0) { + cur_vcpu->flags |= HV_VCPU_UP; + } + break; + case HV_MSR_VP_INDEX: + regs->eax = (u32)(vcp_index); + regs->edx = (u32)(0x0); + break; + case HV_MSR_ICR: + if (!hv_privilege_check(curp, HV_ACCESS_APIC_MSRS)) { + goto msr_read_error; + } + hv_read_icr(&msr_content); + HV_STATS_COLLECT(HV_ICR_READ, &cur_vcpu->stats); + regs->eax = (u32)(msr_content & 0xFFFFFFFF); + regs->edx = (u32)(msr_content >> 32); + break; + case HV_MSR_TPR: + if (!hv_privilege_check(curp, HV_ACCESS_APIC_MSRS)) { + goto msr_read_error; + } + hv_read_tpr(&msr_content); + HV_STATS_COLLECT(HV_TPR_READ, &cur_vcpu->stats); + regs->eax = (u32)(msr_content & 0xFFFFFFFF); + regs->edx = (u32)(msr_content >> 32); + break; + /* + * The following synthetic MSRs are implemented in the Novell Shim. + */ + case HV_MSR_SCONTROL: + if (!hv_privilege_check(curp, HV_ACCESS_SYNC_MSRS)) { + goto msr_read_error; + } + regs->eax = (u32)(cur_vcpu->control_msr & 0xFFFFFFFF); + regs->edx = (u32)(cur_vcpu->control_msr >> 32); + break; + case HV_MSR_SVERSION: + if (!hv_privilege_check(curp, HV_ACCESS_SYNC_MSRS)) { + goto msr_read_error; + } + regs->eax = (u32)(cur_vcpu->version_msr & 0xFFFFFFFF); + regs->edx = (u32)(cur_vcpu->version_msr >> 32); + break; + case HV_MSR_SIEFP: + if (!hv_privilege_check(curp, HV_ACCESS_SYNC_MSRS)) { + goto msr_read_error; + } + regs->eax = (u32)(cur_vcpu->siefp_msr & 0xFFFFFFFF); + regs->edx = (u32)(cur_vcpu->siefp_msr >> 32); + break; + case HV_MSR_SIMP: + if (!hv_privilege_check(curp, HV_ACCESS_SYNC_MSRS)) { + goto msr_read_error; + } + regs->eax = (u32)(cur_vcpu->simp_msr & 0xFFFFFFFF); + regs->edx = (u32)(cur_vcpu->simp_msr >> 32); + break; + case HV_MSR_SINT0: + syn_int = 0; + goto syn_int_read_process; + case HV_MSR_SINT1: + syn_int = 1; + goto syn_int_read_process; + case HV_MSR_SINT2: + syn_int = 2; + goto syn_int_read_process; + case HV_MSR_SINT3: + syn_int = 3; + goto syn_int_read_process; + case HV_MSR_SINT4: + syn_int = 4; + goto syn_int_read_process; + case HV_MSR_SINT5: + syn_int = 5; + goto syn_int_read_process; + case HV_MSR_SINT6: + syn_int = 6; + goto syn_int_read_process; + case HV_MSR_SINT7: + syn_int = 7; + goto syn_int_read_process; + case HV_MSR_SINT8: + syn_int = 8; + goto syn_int_read_process; + case HV_MSR_SINT9: + syn_int = 9; + goto syn_int_read_process; + case HV_MSR_SINT10: + syn_int = 10; + goto syn_int_read_process; + case HV_MSR_SINT11: + syn_int = 11; + goto syn_int_read_process; + case HV_MSR_SINT12: + syn_int = 12; + goto syn_int_read_process; + case HV_MSR_SINT13: + syn_int = 13; + goto syn_int_read_process; + case HV_MSR_SINT14: + syn_int = 14; + goto syn_int_read_process; + case HV_MSR_SINT15: + syn_int = 15; +syn_int_read_process: + if (!hv_privilege_check(curp, HV_ACCESS_SYNC_MSRS)) { + goto msr_read_error; + } + regs->eax = (u32)(cur_vcpu->int_msr[syn_int] & 0xFFFFFFFF); + regs->edx = (u32)(cur_vcpu->int_msr[syn_int] >> 32); + break; + + case HV_MSR_SEOM: + /* + * This is a write only register; reads return 0. + */ + regs->eax = 0; + regs->edx = 0; + break; + case HV_MSR_TIME_REF_COUNT: + if (!hv_access_time_refcnt(curp, &msr_content)) { + goto msr_read_error; + } + regs->eax = (u32)(msr_content & 0xFFFFFFFF); + regs->edx = (u32)(msr_content >> 32); + break; + /* + * Synthetic timer MSRs. + */ + case HV_MSR_TIMER0_CONFIG: + timer = 0; + goto process_timer_config_read; + case HV_MSR_TIMER1_CONFIG: + timer = 1; + goto process_timer_config_read; + case HV_MSR_TIMER2_CONFIG: + timer = 2; + goto process_timer_config_read; + case HV_MSR_TIMER3_CONFIG: + timer = 3; +process_timer_config_read: + if (!hv_privilege_check(curp, HV_ACCESS_SYNC_TIMERS)) { + goto msr_read_error; + } + regs->eax = + (u32)(cur_vcpu->timers[timer].config & 0xFFFFFFFF); + regs->edx = + (u32)(cur_vcpu->timers[timer].config >> 32); + break; + case HV_MSR_TIMER0_COUNT: + timer = 0; + goto process_timer_count_read; + case HV_MSR_TIMER1_COUNT: + timer = 1; + goto process_timer_count_read; + case HV_MSR_TIMER2_COUNT: + timer = 2; + goto process_timer_count_read; + case HV_MSR_TIMER3_COUNT: + timer = 3; +process_timer_count_read: + if (!hv_privilege_check(curp, HV_ACCESS_SYNC_TIMERS)) { + goto msr_read_error; + } + timer_count = cur_vcpu->timers[timer].count; + if (timer_count > ((NOW())/100)) { + timer_count -= ((NOW())/100); + } else { + timer_count = 0; + } + regs->eax = + (u32)(timer_count & 0xFFFFFFFF); + regs->edx = + (u32)(timer_count >> 32); + break; + case HV_MSR_PVDRV_HCALL: + regs->eax = 0; + regs->edx = 0; + break; + case HV_MSR_SYSTEM_RESET: + regs->eax = 0; + regs->edx = 0; + break; + default: + /* + * We did not handle the MSR address specified; + * let the caller figure out + * What to do. + */ + return (0); + } + return (1); +msr_read_error: + /* + * Have to inject #GP fault. + */ + hv_inject_exception(TRAP_gp_fault); + return (1); +} + +int +hyperv_do_wr_msr(uint32_t idx, struct cpu_user_regs *regs) +{ + hv_partition_t *curp = hv_get_current_partition(); + unsigned int vcp_index = hv_get_current_vcpu_index(); + u64 msr_content = 0; + hv_vcpu_t *cur_vcpu = &curp->vcpu_state[vcp_index]; + int syn_int, timer; + struct domain *d = current->domain; + int extid = d->arch.hvm_domain.params[HVM_PARAM_EXTEND_HYPERVISOR]; + + /* + * hvmloader uses wrmsr; we don't want to + * intercept calls coming from the bootstrap (bios) code in the HVM + * guest; we descriminate based on the instruction pointer. + */ + if (hv_call_from_bios(regs)) { + /* + * We don't intercept this. + */ + return (0); + } + msr_content = + (u32)regs->eax | ((u64)regs->edx << 32); + if (extid > 1) { + /* + * For now this is all other "Enlightened" operating systems + * other than Longhorn. + */ + if (idx == 0x40000000) { + /* + * PV driver hypercall setup. Let xen handle this. + */ + return (0); + } + if (idx == 0x40001000) { + idx = 0x40000000; + } + } + + switch (idx) { + case HV_MSR_GUEST_OS_ID: + hv_write_guestid_msr(curp, cur_vcpu, msr_content); + break; + case HV_MSR_HYPERCALL: + hv_write_hypercall_msr(curp, cur_vcpu, msr_content); + break; + + case HV_MSR_VP_INDEX: + goto msr_write_error; + + case HV_MSR_EOI: + if (!hv_privilege_check(curp, HV_ACCESS_APIC_MSRS)) { + goto msr_write_error; + } + hv_write_eoi(msr_content); + HV_STATS_COLLECT(HV_EOI_WRITE, &cur_vcpu->stats); + break; + case HV_MSR_ICR: + if (!hv_privilege_check(curp, HV_ACCESS_APIC_MSRS)) { + goto msr_write_error; + } + hv_write_icr(msr_content); + HV_STATS_COLLECT(HV_ICR_WRITE, &cur_vcpu->stats); + break; + case HV_MSR_TPR: + if (!hv_privilege_check(curp, HV_ACCESS_APIC_MSRS)) { + goto msr_write_error; + } + hv_write_tpr(msr_content); + HV_STATS_COLLECT(HV_TPR_WRITE, &cur_vcpu->stats); + break; + + /* + * The following MSRs are synthetic MSRs supported in the Novell Shim. + */ + case HV_MSR_SCONTROL: + if (!hv_privilege_check(curp, HV_ACCESS_SYNC_MSRS)) { + goto msr_write_error; + } + cur_vcpu->control_msr = msr_content; + break; + case HV_MSR_SVERSION: + if (!hv_privilege_check(curp, HV_ACCESS_SYNC_MSRS)) { + goto msr_write_error; + } + /* + * This is a read-only MSR; generate #GP + */ + hv_inject_exception(TRAP_gp_fault); + break; + case HV_MSR_SIEFP: + case HV_MSR_SIMP: + if (!hv_privilege_check(curp, HV_ACCESS_SYNC_MSRS)) { + goto msr_write_error; + } + hv_write_sx_msr(idx, curp, cur_vcpu, msr_content); + break; + case HV_MSR_SINT0: + syn_int = 0; + goto syn_int_wr_process; + case HV_MSR_SINT1: + syn_int = 1; + goto syn_int_wr_process; + case HV_MSR_SINT2: + syn_int = 2; + goto syn_int_wr_process; + case HV_MSR_SINT3: + syn_int = 3; + goto syn_int_wr_process; + case HV_MSR_SINT4: + syn_int = 4; + goto syn_int_wr_process; + case HV_MSR_SINT5: + syn_int = 5; + goto syn_int_wr_process; + case HV_MSR_SINT6: + syn_int = 6; + goto syn_int_wr_process; + case HV_MSR_SINT7: + syn_int = 7; + goto syn_int_wr_process; + case HV_MSR_SINT8: + syn_int = 8; + goto syn_int_wr_process; + case HV_MSR_SINT9: + syn_int = 9; + goto syn_int_wr_process; + case HV_MSR_SINT10: + syn_int = 10; + goto syn_int_wr_process; + case HV_MSR_SINT11: + syn_int = 11; + goto syn_int_wr_process; + case HV_MSR_SINT12: + syn_int = 12; + goto syn_int_wr_process; + case HV_MSR_SINT13: + syn_int = 13; + goto syn_int_wr_process; + case HV_MSR_SINT14: + syn_int = 14; + goto syn_int_wr_process; + case HV_MSR_SINT15: + syn_int = 15; +syn_int_wr_process: + if (!hv_privilege_check(curp, HV_ACCESS_SYNC_MSRS)) { + goto msr_write_error; + } + /* + * XXXKYS: We assume that the syn_int registers will be + * first written before the interrupt generation can occur. + * Specifically if SINT is masked all interrupts that may have + * been generated will be lost. Also when SINT is disabled; + * its effects will be only felt for subsequent interrupts that + * may be posted. XXXKYS: CHECK + */ + cur_vcpu->int_msr[syn_int] = msr_content; + break; + + case HV_MSR_SEOM: + if (!hv_privilege_check(curp, HV_ACCESS_SYNC_MSRS)) { + goto msr_write_error; + } + cur_vcpu->eom_msr = msr_content; + hv_process_message_q(curp, cur_vcpu); + break; + case HV_MSR_TIME_REF_COUNT: + /* + * This is a read-only msr. + */ + goto msr_write_error; + + /* + * Synthetic timer MSRs. + */ + case HV_MSR_TIMER0_CONFIG: + timer = 0; + goto process_timer_config; + case HV_MSR_TIMER1_CONFIG: + timer = 1; + goto process_timer_config; + case HV_MSR_TIMER2_CONFIG: + timer = 2; + goto process_timer_config; + case HV_MSR_TIMER3_CONFIG: + timer = 3; +process_timer_config: + if (!hv_privilege_check(curp, HV_ACCESS_SYNC_TIMERS)) { + goto msr_write_error; + } + /* + * Assume that the client is going to write the whole msr. + */ + if (!(msr_content & 0x9)) { + /* + * We are neither setting Auto Enable or Enable; + * silently exit. + * Should this be considered to turn off a + * timer that may be currently + * active; XXXKYS: Check. For now we are + * not doing anything here. + */ + break; + } + if (!(((u32)(msr_content >> 16)) & 0x0000000f)) { + /* + * sintx is 0; clear the enable bit(s). + */ + msr_content &= ~(0x1); + } + cur_vcpu->timers[timer].config = msr_content; + /* + * XXXKYS: Can any order be assumed here; + * should we just act on whatever is in the + * count register. For now act as if the count + * register is valid and act on it. + */ + if (msr_content & 0x1) { + hv_schedule_time_out(&cur_vcpu->timers[timer]); + HV_STATS_COLLECT(HV_TIMEOUTS, &cur_vcpu->stats); + } + break; + case HV_MSR_TIMER0_COUNT: + timer = 0; + goto process_timer_count; + case HV_MSR_TIMER1_COUNT: + timer = 1; + goto process_timer_count; + case HV_MSR_TIMER2_COUNT: + timer = 2; + goto process_timer_count; + case HV_MSR_TIMER3_COUNT: + timer = 3; +process_timer_count: + if (!hv_privilege_check(curp, HV_ACCESS_SYNC_TIMERS)) { + goto msr_write_error; + } + cur_vcpu->timers[timer].count = + (msr_content + ((NOW())/100)); + if ((cur_vcpu->timers[timer].config | 0x9)) { + hv_schedule_time_out(&cur_vcpu->timers[timer]); + HV_STATS_COLLECT(HV_TIMEOUTS, &cur_vcpu->stats); + } + + break; + case HV_MSR_PVDRV_HCALL: + /* + * Establish the hypercall page for PV drivers. + */ + wrmsr_hypervisor_regs(0x40000000, regs->eax, regs->edx); + break; + case HV_MSR_SYSTEM_RESET: + /* + * Shutdown the domain/partition. + */ + if (msr_content & 0x1) { + domain_shutdown(d, SHUTDOWN_reboot); + } + break; + + default: + /* + * We did not handle the MSR address; + * let the caller deal with this. + */ + return (0); + } + return (1); +msr_write_error: + /* + * Have to inject #GP fault. + */ + hv_inject_exception(TRAP_gp_fault); + return (1); +} Index: xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_shim.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ xen-unstable.hg/xen/arch/x86/hvm/hyperv/hv_shim.h 2008-03-26 14:29:34.000000000 -0400 @@ -0,0 +1,326 @@ +/**************************************************************************** + | + | Copyright (c) [2007, 2008] Novell, Inc. + | All Rights Reserved. + | + | This program is free software; you can redistribute it and/or + | modify it under the terms of version 2 of the GNU General Public License as + | published by the Free Software Foundation. + | + | This program is distributed in the hope that it will be useful, + | but WITHOUT ANY WARRANTY; without even the implied warranty of + | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + | GNU General Public License for more details. + | + | You should have received a copy of the GNU General Public License + | along with this program; if not, contact Novell, Inc. + | + | To contact Novell about this file by physical or electronic mail, + | you may find current contact information at www.novell.com + | + |*************************************************************************** +*/ + +/* + * Hyperv Shim Implementation. + * + * Engineering Contact: K. Y. Srinivasan + */ + +#ifndef HV_SHIM_H +#define HV_SHIM_H + +#include +#include +#include +#include +#include +#include +#include + +#include "hv_hypercall.h" + +/* + * Synthetic MSR addresses + */ +#define HV_MSR_GUEST_OS_ID 0x40000000 +#define HV_MSR_HYPERCALL 0x40000001 +#define HV_MSR_VP_INDEX 0x40000002 +#define HV_MSR_SYSTEM_RESET 0x40000003 +#define HV_MSR_TIME_REF_COUNT 0x40000020 +#define HV_MSR_EOI 0x40000070 +#define HV_MSR_ICR 0x40000071 +#define HV_MSR_TPR 0x40000072 + +#define HV_MSR_SCONTROL 0x40000080 +#define HV_MSR_SVERSION 0x40000081 +#define HV_MSR_SIEFP 0x40000082 +#define HV_MSR_SIMP 0x40000083 +#define HV_MSR_SEOM 0x40000084 +#define HV_MSR_SINT0 0x40000090 +#define HV_MSR_SINT1 0x40000091 +#define HV_MSR_SINT2 0x40000092 +#define HV_MSR_SINT3 0x40000093 +#define HV_MSR_SINT4 0x40000094 +#define HV_MSR_SINT5 0x40000095 +#define HV_MSR_SINT6 0x40000096 +#define HV_MSR_SINT7 0x40000097 +#define HV_MSR_SINT8 0x40000098 +#define HV_MSR_SINT9 0x40000099 +#define HV_MSR_SINT10 0x4000009A +#define HV_MSR_SINT11 0x4000009B +#define HV_MSR_SINT12 0x4000009C +#define HV_MSR_SINT13 0x4000009D +#define HV_MSR_SINT14 0x4000009E +#define HV_MSR_SINT15 0x4000009F + +#define HV_MSR_TIMER0_CONFIG 0x400000B0 +#define HV_MSR_TIMER0_COUNT 0x400000B1 +#define HV_MSR_TIMER1_CONFIG 0x400000B2 +#define HV_MSR_TIMER1_COUNT 0x400000B3 +#define HV_MSR_TIMER2_CONFIG 0x400000B4 +#define HV_MSR_TIMER2_COUNT 0x400000B5 +#define HV_MSR_TIMER3_CONFIG 0x400000B6 +#define HV_MSR_TIMER3_COUNT 0x400000B7 + +/* + * MSR for supporting PV drivers on longhorn. + */ +#define HV_MSR_PVDRV_HCALL 0x40001000 + +/* + * Hyperv Shim VCPU flags. + */ +#define HV_VCPU_BOOT_CPU 0x00000001 +#define HV_VCPU_UP 0x00000002 + +/* + * Hyperv shim flush flags. + */ + +#define HV_FLUSH_TLB 0X01 +#define HV_FLUSH_INVLPG 0X02 + +/* + * We use the following global state to manage TLB flush requests from the + * guest. At most only one flush can be active in the guest; we may have to + * revisit this if this is a bottleneck. + */ +typedef struct hv_flush_state { + int count; //0 unused; else #cpus participating + cpumask_t waiters; //Cpus waiting for the flush block + struct vcpu *owner; + u64 ret_val; + flush_va_t *flush_param; + unsigned short rep_count; +} hv_flush_state_t; + +/* + * hyperv shim message structure. + */ +typedef enum { + /* + * For now we only support timer messages + */ + TYPE_NONE = 0x00000000, + TYPE_TIMER_EXPIRED = 0x80000010 +} hv_message_type; + +typedef struct hv_timer_message { + hv_message_type type; + u8 pad1[3]; + u8 size; + u32 timer_index; + u32 pad2; + u64 expiration_time; +} hv_timer_message_t; + +typedef struct hv_message { + hv_message_type type; + uint8_t size; + uint8_t flags; + uint8_t reserved[2]; + uint32_t reserved1; + uint64_t pay_load[30]; +} hv_message_t; + + +typedef struct hv_vcpu_timer_state { + u64 config; + u64 count; /*expiration time in 100ns units*/ + int timer_index; + struct hv_vcpu *this_cpu; + struct timer vcpu_timer; +} hv_vcpu_timer_state_t; + +/* + * Stats structure. + */ + +typedef struct { + u64 num_switches; + u64 num_flushes; + u64 num_flushes_posted; + u64 num_flush_ranges; + u64 num_flush_ranges_posted; + + u64 num_tpr_reads; + u64 num_icr_reads; + u64 num_eoi_writes; + u64 num_tpr_writes; + u64 num_icr_writes; + + u64 num_gfs_acquires; + u64 num_gfs_releases; + u64 num_tlb_flushes; + u64 num_invl_pages; + u64 num_time_outs; +} hv_vcpu_stats_t; + +typedef struct hv_vcpu { + /* + * Per-vcpu state to support the hyperv shim; + */ + unsigned long flags; + /* + * Synthetic msrs. + */ + u64 control_msr; + u64 version_msr; + u64 siefp_msr; + u64 simp_msr; + u64 eom_msr; + + u64 int_msr[16]; + /* + * Timer MSRs. + */ + hv_vcpu_timer_state_t timers[4]; + void *sief_page; + void *sim_page; + /* + * Hypercall input/output processing. + * We keep these pages mapped in the hypervisor space. + */ + void *input_buffer; /*input buffer virt address*/ + struct page_info *input_buffer_page; /*input buffer struct page */ + void *output_buffer; /*output buffer virt address*/ + struct page_info *output_buffer_page; /*output buffer struct page */ + struct vcpu *xen_vcpu; /*corresponding xen vcpu*/ + hv_vcpu_stats_t stats; +} hv_vcpu_t; + +/* + * Events of interest for gathering stats. + */ +#define HV_CSWITCH 1 +#define HV_FLUSH_VA_STAT 2 +#define HV_FLUSH_RANGE 3 +#define HV_FLUSH_VA_POSTED 4 +#define HV_FLUSH_RANGE_POSTED 5 +#define HV_TPR_READ 6 +#define HV_ICR_READ 7 +#define HV_TPR_WRITE 8 +#define HV_ICR_WRITE 9 +#define HV_EOI_WRITE 10 + +#define HV_GFS_ACQUIRE 11 +#define HV_GFS_RELEASE 12 +#define HV_TLB_FLUSH 13 +#define HV_INVL_PG 14 +#define HV_TIMEOUTS 15 + +void hv_collect_stats(int event, hv_vcpu_stats_t *ststp); + +#define HV_STATS //KYS: Temporary + +#ifdef HV_STATS +#define HV_STATS_COLLECT(event, statp) hv_collect_stats(event, statp) +#else +define HV_STATS_COLLECT(event, statp) +#endif + +typedef struct hv_partition { + /* + * State maintained on a per guest basis to implement + * the Hyperv shim. + */ + spinlock_t lock; + atomic_t vcpus_active; + u64 guest_id_msr; + u64 hypercall_msr; + u64 privileges; + u64 supported_features; + unsigned long hypercall_mfn; + int long_mode_guest; + /* + * Each VCPU here corresponds to the vcpu in the underlying hypervisor; + * they share the same ID. + */ + hv_vcpu_t vcpu_state[MAX_VIRT_CPUS]; + hv_flush_state_t flush_state; +} hv_partition_t; + + +/* + * Privilege flags. + */ + +#define HV_ACCESS_VP_RUNTIME (1ULL << 0) +#define HV_ACCESS_TIME_REF_CNT (1ULL << 1) +#define HV_ACCESS_SYNC_MSRS (1ULL << 2) +#define HV_ACCESS_SYNC_TIMERS (1ULL << 3) +#define HV_ACCESS_APIC_MSRS (1ULL << 4) +#define HV_ACCESS_PARTITION_ID (1ULL << 33) + +#define hv_get_current_partition() \ +((current)->domain->arch.hvm_domain.hyperv_handle) + +#define hv_get_current_vcpu_index() (current)->vcpu_id + + +static inline int +hv_invalid_cpu_state(void) +{ + int state; + state = hvm_funcs.guest_x86_mode(current); + if ((state == 4) || (state == 8)) { + return (0); + } + return (1); +} + +static inline u64 +hv_build_hcall_retval(int code, int reps) +{ + u64 ret_val=0; + ret_val |= (code & 0xff); + ret_val |= (((long long)(reps & 0xfff)) << 32); + return (ret_val); +} + +static inline void hv_set_syscall_retval(struct cpu_user_regs *pregs, + int long_mode, u64 ret_val) +{ + if (long_mode) { + pregs->eax = ret_val; + } else { + pregs->edx = (u32)(ret_val >> 32); + pregs->eax = (u32)(ret_val); + } +} + +static inline int +hv_privilege_check(hv_partition_t *curp, u64 flags) +{ + return ((curp->privileges & flags)? 1: 0); +} + +void +hv_handle_hypercall(u64 opcode, u64 input, u64 output, + u64 *ret_val); + + +void hv_print_stats(hv_partition_t *curp, int i); + +#endif /*HV_SHIM_H */