diff -r bc573e4f7319 xen/arch/x86/domain.c --- a/xen/arch/x86/domain.c Tue Jan 23 18:03:02 2007 -0600 +++ b/xen/arch/x86/domain.c Tue Jan 23 18:56:16 2007 -0600 @@ -331,6 +331,8 @@ int vcpu_initialise(struct vcpu *v) pae_l3_cache_init(&v->arch.pae_l3_cache); + /* paging vcpu resource initialization */ + paging_vcpu_init(v); if ( is_hvm_domain(d) ) { @@ -424,7 +426,7 @@ int arch_domain_create(struct domain *d) HYPERVISOR_COMPAT_VIRT_START(d) = __HYPERVISOR_COMPAT_VIRT_START; #endif - shadow_domain_init(d); + paging_domain_init(d); if ( !is_idle_domain(d) ) { @@ -464,7 +466,7 @@ void arch_domain_destroy(struct domain * hvm_domain_destroy(d); } - shadow_final_teardown(d); + paging_final_teardown(d); free_xenheap_pages( d->arch.mm_perdomain_pt, @@ -653,7 +655,7 @@ int arch_set_info_guest( set_bit(_VCPUF_initialised, &v->vcpu_flags); if ( shadow_mode_enabled(d) ) - shadow_update_paging_modes(v); + paging_update_paging_modes(v); update_cr3(v); @@ -1459,7 +1461,7 @@ void domain_relinquish_resources(struct vcpu_destroy_pagetables(v); /* Tear down shadow mode stuff. */ - shadow_teardown(d); + paging_teardown(d); /* * Relinquish GDT mappings. No need for explicit unmapping of the LDT as @@ -1481,7 +1483,7 @@ void arch_dump_domain_info(struct domain if ( shadow_mode_enabled(d) ) { printk(" shadow mode: "); - if ( d->arch.shadow.mode & SHM2_enable ) + if ( d->arch.paging.shadow.mode & SHM2_enable ) printk("enabled "); if ( shadow_mode_refcounts(d) ) printk("refcounts "); @@ -1499,11 +1501,11 @@ void arch_dump_vcpu_info(struct vcpu *v) { if ( shadow_mode_enabled(v->domain) ) { - if ( v->arch.shadow.mode ) + if ( v->arch.paging.mode ) printk(" shadowed %u-on-%u, %stranslated\n", - v->arch.shadow.mode->guest_levels, - v->arch.shadow.mode->shadow_levels, - shadow_vcpu_mode_translate(v) ? "" : "not "); + v->arch.paging.mode->guest_levels, + v->arch.paging.mode->shadow.shadow_levels, + paging_vcpu_mode_translate(v) ? "" : "not "); else printk(" not shadowed\n"); } diff -r bc573e4f7319 xen/arch/x86/domain_build.c --- a/xen/arch/x86/domain_build.c Tue Jan 23 18:03:02 2007 -0600 +++ b/xen/arch/x86/domain_build.c Tue Jan 23 18:56:16 2007 -0600 @@ -823,7 +823,7 @@ int construct_dom0(struct domain *d, /* Set up CR3 value for write_ptbase */ if ( shadow_mode_enabled(v->domain) ) - shadow_update_paging_modes(v); + paging_update_paging_modes(v); else update_cr3(v); @@ -973,7 +973,7 @@ int construct_dom0(struct domain *d, if ( opt_dom0_shadow ) if ( shadow_enable(d, SHM2_enable) == 0 ) - shadow_update_paging_modes(v); + paging_update_paging_modes(v); if ( supervisor_mode_kernel ) { diff -r bc573e4f7319 xen/arch/x86/domctl.c --- a/xen/arch/x86/domctl.c Tue Jan 23 18:03:02 2007 -0600 +++ b/xen/arch/x86/domctl.c Tue Jan 23 18:54:33 2007 -0600 @@ -50,7 +50,7 @@ _long arch_do_domctl( d = find_domain_by_id(domctl->domain); if ( d != NULL ) { - ret = shadow_domctl(d, + ret = paging_domctl(d, &domctl->u.shadow_op, guest_handle_cast(u_domctl, void)); put_domain(d); diff -r bc573e4f7319 xen/arch/x86/traps.c --- a/xen/arch/x86/traps.c Tue Jan 23 18:03:02 2007 -0600 +++ b/xen/arch/x86/traps.c Tue Jan 23 18:49:46 2007 -0600 @@ -856,7 +856,7 @@ static int fixup_page_fault(unsigned lon if ( unlikely(IN_HYPERVISOR_RANGE(addr)) ) { if ( shadow_mode_external(d) && guest_mode(regs) ) - return shadow_fault(addr, regs); + return paging_fault(addr, regs); if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) ) return handle_gdt_ldt_mapping_fault( addr - GDT_LDT_VIRT_START, regs); @@ -872,7 +872,7 @@ static int fixup_page_fault(unsigned lon return EXCRET_fault_fixed; if ( shadow_mode_enabled(d) ) - return shadow_fault(addr, regs); + return paging_fault(addr, regs); return 0; } diff -r bc573e4f7319 xen/arch/x86/x86_64/domctl.c --- a/xen/arch/x86/x86_64/domctl.c Tue Jan 23 18:03:02 2007 -0600 +++ b/xen/arch/x86/x86_64/domctl.c Tue Jan 23 18:54:33 2007 -0600 @@ -12,7 +12,7 @@ DEFINE_XEN_GUEST_HANDLE(compat_domctl_t) #define xen_domctl_t compat_domctl_t #define arch_do_domctl(x, h) arch_compat_domctl(x, _##h) -static int compat_shadow_domctl(struct domain *d, +static int compat_paging_domctl(struct domain *d, compat_domctl_shadow_op_t *csc, XEN_GUEST_HANDLE(void) u_domctl) { @@ -53,7 +53,7 @@ static int compat_shadow_domctl(struct d } while (0) XLAT_domctl_shadow_op(&nsc, csc); #undef XLAT_domctl_shadow_op_HNDL_dirty_bitmap - rc = shadow_domctl(d, &nsc, u_domctl); + rc = paging_domctl(d, &nsc, u_domctl); if ( rc != __HYPERVISOR_domctl ) { BUG_ON(rc > 0); @@ -74,7 +74,7 @@ static int compat_shadow_domctl(struct d } #define xen_domctl_shadow_op compat_domctl_shadow_op #define xen_domctl_shadow_op_t compat_domctl_shadow_op_t -#define shadow_domctl(d, sc, u) compat_shadow_domctl(d, sc, u) +#define paging_domctl(d, sc, u) compat_paging_domctl(d, sc, u) #define xen_domctl_ioport_permission compat_domctl_ioport_permission #define xen_domctl_ioport_permission_t compat_domctl_ioport_permission_t diff -r bc573e4f7319 xen/include/asm-x86/domain.h --- a/xen/include/asm-x86/domain.h Tue Jan 23 18:03:02 2007 -0600 +++ b/xen/include/asm-x86/domain.h Tue Jan 23 18:54:33 2007 -0600 @@ -58,31 +58,67 @@ extern void toggle_guest_mode(struct vcp */ extern void hypercall_page_initialise(struct domain *d, void *); +/************************************************/ +/* shadow paging extension */ +/************************************************/ struct shadow_domain { u32 mode; /* flags to control shadow operation */ spinlock_t lock; /* shadow domain lock */ int locker; /* processor which holds the lock */ const char *locker_function; /* Func that took it */ + struct list_head pinned_shadows; + unsigned int opt_flags; /* runtime tunable optimizations on/off */ + + /* Shadow hashtable */ + struct shadow_page_info **hash_table; + int hash_walking; /* Some function is walking the hash table */ + + /* Shadow log-dirty bitmap */ + unsigned long *dirty_bitmap; + unsigned int dirty_bitmap_size; /* in pages, bit per page */ + + /* Shadow log-dirty mode stats */ + unsigned int fault_count; + unsigned int dirty_count; +}; + +struct shadow_vcpu { +#if CONFIG_PAGING_LEVELS >= 3 + /* PAE guests: per-vcpu shadow top-level table */ + l3_pgentry_t l3table[4] __attribute__((__aligned__(32))); +#endif + /* Last MFN that we emulated a write to. */ + unsigned long last_emulated_mfn; + /* MFN of the last shadow that we shot a writeable mapping in */ + unsigned long last_writeable_pte_smfn; +}; +/************************************************/ +/* common paging data structure */ +/************************************************/ +struct paging_domain { struct list_head freelists[SHADOW_MAX_ORDER + 1]; struct list_head p2m_freelist; struct list_head p2m_inuse; - struct list_head pinned_shadows; unsigned int total_pages; /* number of pages allocated */ unsigned int free_pages; /* number of pages on freelists */ unsigned int p2m_pages; /* number of pages in p2m map */ - unsigned int opt_flags; /* runtime tunable optimizations on/off */ - - /* Shadow hashtable */ - struct shadow_page_info **hash_table; - int hash_walking; /* Some function is walking the hash table */ - - /* Shadow log-dirty bitmap */ - unsigned long *dirty_bitmap; - unsigned int dirty_bitmap_size; /* in pages, bit per page */ - - /* Shadow log-dirty mode stats */ - unsigned int fault_count; - unsigned int dirty_count; + + int (*domctl )(struct domain *d, xen_domctl_shadow_op_t *sc, + XEN_GUEST_HANDLE(void) u_domctl); + void (*final_teardown )(struct domain *d); + void (*teardown )(struct domain *d); + + /* extension for shadow paging support */ + struct shadow_domain shadow; +}; + +struct paging_vcpu { + struct paging_mode *mode; + /* HVM guest: paging enabled (CR0.PG)? */ + unsigned int translate_enabled:1; + + /* paging support extension */ + struct shadow_vcpu shadow; }; struct arch_domain @@ -108,7 +144,7 @@ struct arch_domain struct hvm_domain hvm_domain; - struct shadow_domain shadow; + struct paging_domain paging; /* Shadow translated domain: P2M mapping */ pagetable_t phys_table; @@ -139,21 +175,6 @@ struct pae_l3_cache { }; #define pae_l3_cache_init(c) ((void)0) #endif -struct shadow_vcpu { -#if CONFIG_PAGING_LEVELS >= 3 - /* PAE guests: per-vcpu shadow top-level table */ - l3_pgentry_t l3table[4] __attribute__((__aligned__(32))); -#endif - /* Pointers to mode-specific entry points. */ - struct shadow_paging_mode *mode; - /* Last MFN that we emulated a write to. */ - unsigned long last_emulated_mfn; - /* MFN of the last shadow that we shot a writeable mapping in */ - unsigned long last_writeable_pte_smfn; - /* HVM guest: paging enabled (CR0.PG)? */ - unsigned int translate_enabled:1; -}; - struct arch_vcpu { /* Needs 16-byte aligment for FXSAVE/FXRSTOR. */ @@ -205,7 +226,7 @@ struct arch_vcpu /* Current LDT details. */ unsigned long shadow_ldt_mapcnt; - struct shadow_vcpu shadow; + struct paging_vcpu paging; } __cacheline_aligned; /* shorthands to improve code legibility */ diff -r bc573e4f7319 xen/include/asm-x86/shadow.h --- a/xen/include/asm-x86/shadow.h Tue Jan 23 18:03:02 2007 -0600 +++ b/xen/include/asm-x86/shadow.h Tue Jan 23 18:56:16 2007 -0600 @@ -28,6 +28,7 @@ #include #include #include +#include /***************************************************************************** * Macros to tell which shadow paging mode a domain is in */ @@ -45,11 +46,15 @@ * requires VT or similar mechanisms */ #define SHM2_external (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << SHM2_shift) -#define shadow_mode_enabled(_d) ((_d)->arch.shadow.mode) -#define shadow_mode_refcounts(_d) ((_d)->arch.shadow.mode & SHM2_refcounts) -#define shadow_mode_log_dirty(_d) ((_d)->arch.shadow.mode & SHM2_log_dirty) -#define shadow_mode_translate(_d) ((_d)->arch.shadow.mode & SHM2_translate) -#define shadow_mode_external(_d) ((_d)->arch.shadow.mode & SHM2_external) +#define shadow_mode_enabled(_d) ((_d)->arch.paging.shadow.mode) +#define shadow_mode_refcounts(_d) ((_d)->arch.paging.shadow.mode & \ + SHM2_refcounts) +#define shadow_mode_log_dirty(_d) ((_d)->arch.paging.shadow.mode & \ + SHM2_log_dirty) +#define shadow_mode_translate(_d) ((_d)->arch.paging.shadow.mode & \ + SHM2_translate) +#define shadow_mode_external(_d) ((_d)->arch.paging.shadow.mode & \ + SHM2_external) /* Xen traps & emulates all reads of all page table pages: * not yet supported */ @@ -58,14 +63,6 @@ /****************************************************************************** * The equivalent for a particular vcpu of a shadowed domain. */ - -/* Is this vcpu using the P2M table to translate between GFNs and MFNs? - * - * This is true of translated HVM domains on a vcpu which has paging - * enabled. (HVM vcpus with paging disabled are using the p2m table as - * its paging table, so no translation occurs in this case.) - * It is also true for all vcpus of translated PV domains. */ -#define shadow_vcpu_mode_translate(_v) ((_v)->arch.shadow.translate_enabled) /* * 32on64 support @@ -76,58 +73,6 @@ #define pv_32bit_guest(_v) (!is_hvm_vcpu(_v)) #endif -/****************************************************************************** - * With shadow pagetables, the different kinds of address start - * to get get confusing. - * - * Virtual addresses are what they usually are: the addresses that are used - * to accessing memory while the guest is running. The MMU translates from - * virtual addresses to machine addresses. - * - * (Pseudo-)physical addresses are the abstraction of physical memory the - * guest uses for allocation and so forth. For the purposes of this code, - * we can largely ignore them. - * - * Guest frame numbers (gfns) are the entries that the guest puts in its - * pagetables. For normal paravirtual guests, they are actual frame numbers, - * with the translation done by the guest. - * - * Machine frame numbers (mfns) are the entries that the hypervisor puts - * in the shadow page tables. - * - * Elsewhere in the xen code base, the name "gmfn" is generally used to refer - * to a "machine frame number, from the guest's perspective", or in other - * words, pseudo-physical frame numbers. However, in the shadow code, the - * term "gmfn" means "the mfn of a guest page"; this combines naturally with - * other terms such as "smfn" (the mfn of a shadow page), gl2mfn (the mfn of a - * guest L2 page), etc... - */ - -/* With this defined, we do some ugly things to force the compiler to - * give us type safety between mfns and gfns and other integers. - * TYPE_SAFE(int foo) defines a foo_t, and _foo() and foo_x() functions - * that translate beween int and foo_t. - * - * It does have some performance cost because the types now have - * a different storage attribute, so may not want it on all the time. */ -#ifndef NDEBUG -#define TYPE_SAFETY 1 -#endif - -#ifdef TYPE_SAFETY -#define TYPE_SAFE(_type,_name) \ -typedef struct { _type _name; } _name##_t; \ -static inline _name##_t _##_name(_type n) { return (_name##_t) { n }; } \ -static inline _type _name##_x(_name##_t n) { return n._name; } -#else -#define TYPE_SAFE(_type,_name) \ -typedef _type _name##_t; \ -static inline _name##_t _##_name(_type n) { return n; } \ -static inline _type _name##_x(_name##_t n) { return n; } -#endif - -TYPE_SAFE(unsigned long,mfn) - /* Macro for printk formats: use as printk("%"SH_PRI_mfn"\n", mfn_x(foo)); */ #define SH_PRI_mfn "05lx" @@ -138,59 +83,16 @@ TYPE_SAFE(unsigned long,mfn) * These shouldn't be used directly by callers; rather use the functions * below which will indirect through this table as appropriate. */ -struct sh_emulate_ctxt; -struct shadow_paging_mode { - int (*page_fault )(struct vcpu *v, unsigned long va, - struct cpu_user_regs *regs); - int (*invlpg )(struct vcpu *v, unsigned long va); - paddr_t (*gva_to_gpa )(struct vcpu *v, unsigned long va); - unsigned long (*gva_to_gfn )(struct vcpu *v, unsigned long va); - void (*update_cr3 )(struct vcpu *v, int do_locking); - int (*map_and_validate_gl1e )(struct vcpu *v, mfn_t gmfn, - void *new_guest_entry, u32 size); - int (*map_and_validate_gl2e )(struct vcpu *v, mfn_t gmfn, - void *new_guest_entry, u32 size); - int (*map_and_validate_gl2he)(struct vcpu *v, mfn_t gmfn, - void *new_guest_entry, u32 size); - int (*map_and_validate_gl3e )(struct vcpu *v, mfn_t gmfn, - void *new_guest_entry, u32 size); - int (*map_and_validate_gl4e )(struct vcpu *v, mfn_t gmfn, - void *new_guest_entry, u32 size); - void (*detach_old_tables )(struct vcpu *v); - int (*x86_emulate_write )(struct vcpu *v, unsigned long va, - void *src, u32 bytes, - struct sh_emulate_ctxt *sh_ctxt); - int (*x86_emulate_cmpxchg )(struct vcpu *v, unsigned long va, - unsigned long old, - unsigned long new, - unsigned int bytes, - struct sh_emulate_ctxt *sh_ctxt); - int (*x86_emulate_cmpxchg8b )(struct vcpu *v, unsigned long va, - unsigned long old_lo, - unsigned long old_hi, - unsigned long new_lo, - unsigned long new_hi, - struct sh_emulate_ctxt *sh_ctxt); - mfn_t (*make_monitor_table )(struct vcpu *v); - void (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn); - void * (*guest_map_l1e )(struct vcpu *v, unsigned long va, - unsigned long *gl1mfn); - void (*guest_get_eff_l1e )(struct vcpu *v, unsigned long va, - void *eff_l1e); - int (*guess_wrmap )(struct vcpu *v, - unsigned long vaddr, mfn_t gmfn); - /* For outsiders to tell what mode we're in */ - unsigned int shadow_levels; - unsigned int guest_levels; -}; - - /***************************************************************************** * Entry points into the shadow code */ /* Set up the shadow-specific parts of a domain struct at start of day. * Called for every domain from arch_domain_create() */ void shadow_domain_init(struct domain *d); + +/* Setup the shadow-specific parts of a vcpu struct. It is called by + * paging_vcpu_init() in paging.c */ +void shadow_vcpu_init(struct vcpu *v); /* Enable an arbitrary shadow mode. Call once at domain creation. */ int shadow_enable(struct domain *d, u32 mode); @@ -218,52 +120,6 @@ static inline void mark_dirty(struct dom shadow_mark_dirty(d, _mfn(gmfn)); } -/* Handle page-faults caused by the shadow pagetable mechanisms. - * Called from pagefault handler in Xen, and from the HVM trap handlers - * for pagefaults. Returns 1 if this fault was an artefact of the - * shadow code (and the guest should retry) or 0 if it is not (and the - * fault should be handled elsewhere or passed to the guest). */ -static inline int shadow_fault(unsigned long va, struct cpu_user_regs *regs) -{ - struct vcpu *v = current; - perfc_incrc(shadow_fault); - return v->arch.shadow.mode->page_fault(v, va, regs); -} - -/* Handle invlpg requests on shadowed vcpus. - * Returns 1 if the invlpg instruction should be issued on the hardware, - * or 0 if it's safe not to do so. */ -static inline int shadow_invlpg(struct vcpu *v, unsigned long va) -{ - return v->arch.shadow.mode->invlpg(v, va); -} - -/* Translate a guest virtual address to the physical address that the - * *guest* pagetables would map it to. */ -static inline paddr_t shadow_gva_to_gpa(struct vcpu *v, unsigned long va) -{ - if ( unlikely(!shadow_vcpu_mode_translate(v)) ) - return (paddr_t) va; - return v->arch.shadow.mode->gva_to_gpa(v, va); -} - -/* Translate a guest virtual address to the frame number that the - * *guest* pagetables would map it to. */ -static inline unsigned long shadow_gva_to_gfn(struct vcpu *v, unsigned long va) -{ - if ( unlikely(!shadow_vcpu_mode_translate(v)) ) - return va >> PAGE_SHIFT; - return v->arch.shadow.mode->gva_to_gfn(v, va); -} - -/* Update all the things that are derived from the guest's CR3. - * Called when the guest changes CR3; the caller can then use v->arch.cr3 - * as the value to load into the host CR3 to schedule this vcpu */ -static inline void shadow_update_cr3(struct vcpu *v) -{ - v->arch.shadow.mode->update_cr3(v, 1); -} - /* Update all the things that are derived from the guest's CR0/CR3/CR4. * Called to initialize paging structures if the paging mode * has changed, and when bringing up a VCPU for the first time. */ @@ -280,7 +136,7 @@ guest_map_l1e(struct vcpu *v, unsigned l l2_pgentry_t l2e; if ( unlikely(shadow_mode_translate(v->domain)) ) - return v->arch.shadow.mode->guest_map_l1e(v, addr, gl1mfn); + return v->arch.paging.mode->shadow.guest_map_l1e(v, addr, gl1mfn); /* Find this l1e and its enclosing l1mfn in the linear map */ if ( __copy_from_user(&l2e, @@ -317,7 +173,7 @@ guest_get_eff_l1e(struct vcpu *v, unsign return; } - v->arch.shadow.mode->guest_get_eff_l1e(v, addr, eff_l1e); + v->arch.paging.mode->shadow.guest_get_eff_l1e(v, addr, eff_l1e); } /* Read the guest's l1e that maps this address, from the kernel-mode