diff -r 8ee92b9f890f xen/arch/x86/hvm/emulate.c --- a/xen/arch/x86/hvm/emulate.c Fri Jul 10 18:12:13 2009 +0100 +++ b/xen/arch/x86/hvm/emulate.c Mon Jul 13 12:00:26 2009 +0100 @@ -411,6 +411,9 @@ if ( rc == HVMCOPY_bad_gva_to_gfn ) return X86EMUL_EXCEPTION; + if ( rc == HVMCOPY_unemulatable ) + return X86EMUL_UNHANDLEABLE; + if ( rc == HVMCOPY_bad_gfn_to_mfn ) { if ( access_type == hvm_access_insn_fetch ) @@ -498,6 +501,8 @@ rc = hvm_copy_to_guest_virt(addr, p_data, bytes, pfec); if ( rc == HVMCOPY_bad_gva_to_gfn ) return X86EMUL_EXCEPTION; + if ( rc == HVMCOPY_unemulatable ) + return X86EMUL_UNHANDLEABLE; if ( rc == HVMCOPY_bad_gfn_to_mfn ) { @@ -636,12 +641,12 @@ return rc; (void)gfn_to_mfn_current(sgpa >> PAGE_SHIFT, &p2mt); - if ( !p2m_is_ram(p2mt) ) + if ( !p2m_is_ram(p2mt) && !p2m_is_grant(p2mt) ) return hvmemul_do_mmio( sgpa, reps, bytes_per_rep, dgpa, IOREQ_READ, df, NULL); (void)gfn_to_mfn_current(dgpa >> PAGE_SHIFT, &p2mt); - if ( !p2m_is_ram(p2mt) ) + if ( !p2m_is_ram(p2mt) && !p2m_is_grant(p2mt) ) return hvmemul_do_mmio( dgpa, reps, bytes_per_rep, sgpa, IOREQ_WRITE, df, NULL); diff -r 8ee92b9f890f xen/arch/x86/hvm/hvm.c --- a/xen/arch/x86/hvm/hvm.c Fri Jul 10 18:12:13 2009 +0100 +++ b/xen/arch/x86/hvm/hvm.c Mon Jul 13 12:00:26 2009 +0100 @@ -1434,6 +1434,7 @@ if ( tr.limit < (sizeof(tss)-1) ) { + bad_tss: hvm_inject_exception(TRAP_invalid_tss, tss_sel & 0xfff8, 0); goto out; } @@ -1442,6 +1443,8 @@ &tss, prev_tr.base, sizeof(tss), PFEC_page_present); if ( rc == HVMCOPY_bad_gva_to_gfn ) goto out; + if ( rc != HVMCOPY_okay ) + goto bad_tss; eflags = regs->eflags; if ( taskswitch_reason == TSW_iret ) @@ -1478,11 +1481,17 @@ prev_tr.base, &tss, sizeof(tss), PFEC_page_present); if ( rc == HVMCOPY_bad_gva_to_gfn ) goto out; + /* The copy back might still have failed, but that's the guest's + problem at this stage. */ rc = hvm_copy_from_guest_virt( &tss, tr.base, sizeof(tss), PFEC_page_present); if ( rc == HVMCOPY_bad_gva_to_gfn ) goto out; + if ( rc != HVMCOPY_okay ) { + /* The guest is probably dead now. Oh well. */ + goto bad_tss; + } if ( hvm_set_cr3(tss.cr3) ) goto out; @@ -1518,6 +1527,11 @@ tr.base, &tss, sizeof(tss), PFEC_page_present); if ( rc == HVMCOPY_bad_gva_to_gfn ) exn_raised = 1; + if ( rc != HVMCOPY_okay ) + { + hvm_inject_exception(TRAP_invalid_tss, tss_sel & 0xfff8, 0); + exn_raised = 1; + } if ( (tss.trace & 1) && !exn_raised ) hvm_inject_exception(TRAP_debug, tss_sel & 0xfff8, 0); @@ -1589,6 +1603,8 @@ mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt)); + if ( p2m_is_grant(p2mt) ) + return HVMCOPY_unemulatable; if ( !p2m_is_ram(p2mt) ) return HVMCOPY_bad_gfn_to_mfn; ASSERT(mfn_valid(mfn)); @@ -1997,7 +2013,8 @@ static long hvm_grant_table_op( unsigned int cmd, XEN_GUEST_HANDLE(void) uop, unsigned int count) { - if ( (cmd != GNTTABOP_query_size) && (cmd != GNTTABOP_setup_table) ) + if ( (cmd != GNTTABOP_query_size) && (cmd != GNTTABOP_setup_table) && + (cmd != GNTTABOP_map_grant_ref) && (cmd != GNTTABOP_unmap_grant_ref) ) return -ENOSYS; /* all other commands need auditing */ return do_grant_table_op(cmd, uop, count); } @@ -2804,17 +2821,35 @@ if ( a.hvmmem_type >= ARRAY_SIZE(memtype) ) goto param_fail4; - - rc = 0; - + for ( pfn = a.first_pfn; pfn < a.first_pfn + a.nr; pfn++ ) { p2m_type_t t; + p2m_type_t nt; mfn_t mfn; mfn = gfn_to_mfn(d, pfn, &t); - p2m_change_type(d, pfn, t, memtype[a.hvmmem_type]); + if ( p2m_is_grant(t) ) + { + gdprintk(XENLOG_WARNING, + "type for pfn 0x%lx changed to grant while we were working?\n", + pfn); + goto param_fail4; + } + else + { + nt = p2m_change_type(d, pfn, t, memtype[a.hvmmem_type]); + if ( nt != t ) + { + gdprintk(XENLOG_WARNING, + "type of pfn 0x%lx changed from %d to %d while we were trying to change it to %d\n", + pfn, t, nt, memtype[a.hvmmem_type]); + goto param_fail4; + } + } } - + + rc = 0; + param_fail4: rcu_unlock_domain(d); break; diff -r 8ee92b9f890f xen/arch/x86/hvm/stdvga.c --- a/xen/arch/x86/hvm/stdvga.c Fri Jul 10 18:12:13 2009 +0100 +++ b/xen/arch/x86/hvm/stdvga.c Mon Jul 13 12:00:26 2009 +0100 @@ -478,8 +478,8 @@ for ( i = 0; i < p->count; i++ ) { tmp = stdvga_mem_read(addr, p->size); - if ( hvm_copy_to_guest_phys(data, &tmp, p->size) == - HVMCOPY_bad_gfn_to_mfn ) + if ( hvm_copy_to_guest_phys(data, &tmp, p->size) != + HVMCOPY_okay ) { (void)gfn_to_mfn_current(data >> PAGE_SHIFT, &p2mt); /* @@ -500,8 +500,8 @@ uint32_t addr = p->addr, data = p->data, tmp; for ( i = 0; i < p->count; i++ ) { - if ( hvm_copy_from_guest_phys(&tmp, data, p->size) == - HVMCOPY_bad_gfn_to_mfn ) + if ( hvm_copy_from_guest_phys(&tmp, data, p->size) != + HVMCOPY_okay ) { (void)gfn_to_mfn_current(data >> PAGE_SHIFT, &p2mt); if ( (p2mt != p2m_mmio_dm) || (data < VGA_MEM_BASE) || diff -r 8ee92b9f890f xen/arch/x86/hvm/svm/emulate.c --- a/xen/arch/x86/hvm/svm/emulate.c Fri Jul 10 18:12:13 2009 +0100 +++ b/xen/arch/x86/hvm/svm/emulate.c Mon Jul 13 12:00:26 2009 +0100 @@ -126,13 +126,15 @@ /* OK just to give up; we'll have injected #PF already */ return 0; case HVMCOPY_bad_gfn_to_mfn: - default: + case HVMCOPY_unemulatable: /* Not OK: fetches from non-RAM pages are not supportable. */ gdprintk(XENLOG_WARNING, "Bad instruction fetch at %#lx (%#lx)\n", (unsigned long) guest_cpu_user_regs()->eip, addr); hvm_inject_exception(TRAP_gp_fault, 0, 0); return 0; } + BUG(); + return 0; } int __get_instruction_length_from_list(struct vcpu *v, diff -r 8ee92b9f890f xen/arch/x86/hvm/svm/svm.c --- a/xen/arch/x86/hvm/svm/svm.c Fri Jul 10 18:12:13 2009 +0100 +++ b/xen/arch/x86/hvm/svm/svm.c Mon Jul 13 12:00:26 2009 +0100 @@ -938,8 +938,27 @@ } /* Log-dirty: mark the page dirty and let the guest write it again */ - paging_mark_dirty(current->domain, mfn_x(mfn)); - p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw); + if ( p2mt == p2m_ram_logdirty ) + { + paging_mark_dirty(current->domain, mfn_x(mfn)); + p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw); + return; + } + + /* Okay, this shouldn't happen. Maybe the guest was writing to a + read-only grant mapping? */ + if ( p2mt == p2m_grant_map_ro ) + { + /* Naughty... */ + gdprintk(XENLOG_WARNING, + "trying to write to read-only grant mapping\n"); + hvm_inject_exception(TRAP_gp_fault, 0, 0); + return; + } + + /* Something bad has happened; either Xen or the hardware have + screwed up. */ + gdprintk(XENLOG_WARNING, "unexpected SVM nested page fault\n"); } static void svm_fpu_dirty_intercept(void) diff -r 8ee92b9f890f xen/arch/x86/mm.c --- a/xen/arch/x86/mm.c Fri Jul 10 18:12:13 2009 +0100 +++ b/xen/arch/x86/mm.c Mon Jul 13 12:00:26 2009 +0100 @@ -2672,10 +2672,16 @@ break; } - okay = 1; gmfn = op.arg1.mfn; mfn = gmfn_to_mfn(FOREIGNDOM, gmfn); + if ( mfn == INVALID_MFN ) + { + MEM_LOG("Bad gmfn_to_mfn"); + rc = -EFAULT; + break; + } page = mfn_to_page(mfn); + okay = 1; switch ( op.cmd ) { @@ -3475,11 +3481,37 @@ return replace_grant_va_mapping(addr, frame, l1e_empty(), v); } +static int create_grant_p2m_mapping(uint64_t addr, unsigned long frame, + unsigned int flags, + unsigned int cache_flags) +{ + p2m_type_t p2mt; + int rc; + + if ( cache_flags || (flags & ~GNTMAP_readonly) != GNTMAP_host_map ) + return GNTST_general_error; + + if ( flags & GNTMAP_readonly ) + p2mt = p2m_grant_map_ro; + else + p2mt = p2m_grant_map_rw; + rc = guest_physmap_add_entry(current->domain, addr >> PAGE_SHIFT, + frame, 0, p2mt); + if ( rc ) + return GNTST_general_error; + else + return GNTST_okay; +} + int create_grant_host_mapping(uint64_t addr, unsigned long frame, unsigned int flags, unsigned int cache_flags) { - l1_pgentry_t pte = l1e_from_pfn(frame, GRANT_PTE_FLAGS); - + l1_pgentry_t pte; + + if ( paging_mode_external(current->domain) ) + return create_grant_p2m_mapping(addr, frame, flags, cache_flags); + + pte = l1e_from_pfn(frame, GRANT_PTE_FLAGS); if ( (flags & GNTMAP_application_map) ) l1e_add_flags(pte,_PAGE_USER); if ( !(flags & GNTMAP_readonly) ) @@ -3496,6 +3528,29 @@ return create_grant_va_mapping(addr, pte, current); } +static int replace_grant_p2m_mapping( + uint64_t addr, unsigned long frame, uint64_t new_addr, unsigned int flags) +{ + unsigned long gfn = (unsigned long)(addr >> PAGE_SHIFT); + p2m_type_t type; + mfn_t old_mfn; + + if ( new_addr != 0 || (flags & GNTMAP_contains_pte) ) + return GNTST_general_error; + + old_mfn = gfn_to_mfn_current(gfn, &type); + if ( !p2m_is_grant(type) || mfn_x(old_mfn) != frame ) + { + gdprintk(XENLOG_WARNING, + "replace_grant_p2m_mapping: old mapping invalid (type %d, mfn %lx, frame %lx)\n", + type, mfn_x(old_mfn), frame); + return GNTST_general_error; + } + guest_physmap_remove_page(current->domain, gfn, frame, 0); + + return GNTST_okay; +} + int replace_grant_host_mapping( uint64_t addr, unsigned long frame, uint64_t new_addr, unsigned int flags) { @@ -3505,6 +3560,9 @@ struct page_info *l1pg; int rc; + if ( paging_mode_external(current->domain) ) + return replace_grant_p2m_mapping(addr, frame, new_addr, flags); + if ( flags & GNTMAP_contains_pte ) { if ( !new_addr ) diff -r 8ee92b9f890f xen/arch/x86/mm/hap/p2m-ept.c --- a/xen/arch/x86/mm/hap/p2m-ept.c Fri Jul 10 18:12:13 2009 +0100 +++ b/xen/arch/x86/mm/hap/p2m-ept.c Mon Jul 13 12:00:26 2009 +0100 @@ -39,10 +39,12 @@ return; case p2m_ram_rw: case p2m_mmio_direct: + case p2m_grant_map_rw: entry->r = entry->w = entry->x = 1; return; case p2m_ram_logdirty: case p2m_ram_ro: + case p2m_grant_map_ro: entry->r = entry->x = 1; entry->w = 0; return; diff -r 8ee92b9f890f xen/arch/x86/mm/p2m.c --- a/xen/arch/x86/mm/p2m.c Fri Jul 10 18:12:13 2009 +0100 +++ b/xen/arch/x86/mm/p2m.c Mon Jul 13 12:00:26 2009 +0100 @@ -102,17 +102,27 @@ static unsigned long p2m_type_to_flags(p2m_type_t t) { - unsigned long flags = (t & 0x7UL) << 9; + unsigned long flags; +#ifdef __x86_64__ + flags = (unsigned long)(t & 0x3fff) << 9; +#else + flags = (t & 0x7UL) << 9; +#endif +#ifndef HAVE_GRANT_MAP_P2M + BUG_ON(p2m_is_grant(t)); +#endif switch(t) { case p2m_invalid: default: return flags; case p2m_ram_rw: + case p2m_grant_map_rw: return flags | P2M_BASE_FLAGS | _PAGE_RW; case p2m_ram_logdirty: return flags | P2M_BASE_FLAGS; case p2m_ram_ro: + case p2m_grant_map_ro: return flags | P2M_BASE_FLAGS; case p2m_mmio_dm: return flags; @@ -1321,7 +1331,7 @@ unmap_domain_page(l1e); ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); - return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN); + return (p2m_is_valid(*t) || p2m_is_grant(*t)) ? mfn : _mfn(INVALID_MFN); } /* Read the current domain's p2m table (through the linear mapping). */ @@ -1438,7 +1448,7 @@ } } - if ( p2m_is_valid(p2mt) ) + if ( p2m_is_valid(p2mt) || p2m_is_grant(p2mt) ) mfn = _mfn(l1e_get_pfn(l1e)); else /* XXX see above */ @@ -1790,18 +1800,21 @@ for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ ) { + p2m_type_t type; + + type = p2m_flags_to_type(l1e_get_flags(l1e[i1])); if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) ) { - if ( p2m_flags_to_type(l1e_get_flags(l1e[i1])) - == p2m_populate_on_demand ) - entry_count++; + if ( type == p2m_populate_on_demand ) + entry_count++; continue; } mfn = l1e_get_pfn(l1e[i1]); ASSERT(mfn_valid(_mfn(mfn))); m2pfn = get_gpfn_from_mfn(mfn); if ( m2pfn != gfn && - p2m_flags_to_type(l1e_get_flags(l1e[i1])) != p2m_mmio_direct ) + type != p2m_mmio_direct && + !p2m_is_grant(type) ) { pmbad++; printk("mismatch: gfn %#lx -> mfn %#lx" @@ -1854,6 +1867,8 @@ unsigned int page_order) { unsigned long i; + mfn_t mfn_return; + p2m_type_t t; if ( !paging_mode_translate(d) ) { @@ -1865,9 +1880,14 @@ P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn); + for ( i = 0; i < (1UL << page_order); i++ ) + { + mfn_return = p2m_gfn_to_mfn(d, gfn + i, &t, p2m_query); + if ( !p2m_is_grant(t) ) + set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY); + ASSERT( !p2m_is_valid(t) || mfn + i == mfn_x(mfn_return) ); + } set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order, p2m_invalid); - for ( i = 0; i < (1UL << page_order); i++ ) - set_gpfn_from_mfn(mfn+i, INVALID_M2P_ENTRY); } void @@ -2003,7 +2023,14 @@ for ( i = 0; i < (1UL << page_order); i++ ) { omfn = gfn_to_mfn_query(d, gfn + i, &ot); - if ( p2m_is_ram(ot) ) + if ( p2m_is_grant(ot) ) + { + /* Really shouldn't be unmapping grant maps this way */ + domain_crash(d); + p2m_unlock(d->arch.p2m); + return -EINVAL; + } + else if ( p2m_is_ram(ot) ) { ASSERT(mfn_valid(omfn)); set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); @@ -2018,6 +2045,8 @@ /* Then, look for m->p mappings for this range and deal with them */ for ( i = 0; i < (1UL << page_order); i++ ) { + if ( page_get_owner(mfn_to_page(_mfn(mfn + i))) != d ) + continue; ogfn = mfn_to_gfn(d, _mfn(mfn+i)); if ( #ifdef __x86_64__ @@ -2033,6 +2062,9 @@ P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", mfn + i, ogfn, gfn + i); omfn = gfn_to_mfn_query(d, ogfn, &ot); + /* If we get here, we know the local domain owns the page, + so it can't have been grant mapped in. */ + BUG_ON( p2m_is_grant(ot) ); if ( p2m_is_ram(ot) ) { ASSERT(mfn_valid(omfn)); @@ -2049,8 +2081,11 @@ { if ( !set_p2m_entry(d, gfn, _mfn(mfn), page_order, t) ) rc = -EINVAL; - for ( i = 0; i < (1UL << page_order); i++ ) - set_gpfn_from_mfn(mfn+i, gfn+i); + if ( !p2m_is_grant(t) ) + { + for ( i = 0; i < (1UL << page_order); i++ ) + set_gpfn_from_mfn(mfn+i, gfn+i); + } } else { @@ -2089,6 +2124,8 @@ l4_pgentry_t *l4e; int i4; #endif /* CONFIG_PAGING_LEVELS == 4 */ + + BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt)); if ( !paging_mode_translate(d) ) return; @@ -2185,6 +2222,8 @@ p2m_type_t pt; mfn_t mfn; + BUG_ON(p2m_is_grant(ot) || p2m_is_grant(nt)); + p2m_lock(d->arch.p2m); mfn = gfn_to_mfn(d, gfn, &pt); @@ -2207,7 +2246,12 @@ return 0; omfn = gfn_to_mfn_query(d, gfn, &ot); - if ( p2m_is_ram(ot) ) + if ( p2m_is_grant(ot) ) + { + domain_crash(d); + return 0; + } + else if ( p2m_is_ram(ot) ) { ASSERT(mfn_valid(omfn)); set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); diff -r 8ee92b9f890f xen/arch/x86/mm/paging.c --- a/xen/arch/x86/mm/paging.c Fri Jul 10 18:12:13 2009 +0100 +++ b/xen/arch/x86/mm/paging.c Mon Jul 13 12:00:26 2009 +0100 @@ -272,7 +272,8 @@ gmfn = _mfn(guest_mfn); - if ( !paging_mode_log_dirty(d) || !mfn_valid(gmfn) ) + if ( !paging_mode_log_dirty(d) || !mfn_valid(gmfn) || + page_get_owner(mfn_to_page(gmfn)) != d ) return; log_dirty_lock(d); diff -r 8ee92b9f890f xen/arch/x86/mm/shadow/common.c --- a/xen/arch/x86/mm/shadow/common.c Fri Jul 10 18:12:13 2009 +0100 +++ b/xen/arch/x86/mm/shadow/common.c Mon Jul 13 12:00:26 2009 +0100 @@ -172,10 +172,12 @@ return X86EMUL_OKAY; case HVMCOPY_bad_gva_to_gfn: return X86EMUL_EXCEPTION; - default: - break; + case HVMCOPY_bad_gfn_to_mfn: + case HVMCOPY_unemulatable: + return X86EMUL_UNHANDLEABLE; } + BUG(); return X86EMUL_UNHANDLEABLE; } @@ -3431,7 +3433,7 @@ { mfn_t mfn = _mfn(l1e_get_pfn(*p)); p2m_type_t p2mt = p2m_flags_to_type(l1e_get_flags(*p)); - if ( p2m_is_valid(p2mt) && mfn_valid(mfn) ) + if ( (p2m_is_valid(p2mt) || p2m_is_grant(p2mt)) && mfn_valid(mfn) ) { sh_remove_all_shadows_and_parents(v, mfn); if ( sh_remove_all_mappings(v, mfn) ) diff -r 8ee92b9f890f xen/arch/x86/mm/shadow/multi.c --- a/xen/arch/x86/mm/shadow/multi.c Fri Jul 10 18:12:13 2009 +0100 +++ b/xen/arch/x86/mm/shadow/multi.c Mon Jul 13 12:00:26 2009 +0100 @@ -484,7 +484,7 @@ ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3); /* Check there's something for the shadows to map to */ - if ( !p2m_is_valid(p2mt) ) + if ( !p2m_is_valid(p2mt) && !p2m_is_grant(p2mt) ) { *sp = shadow_l1e_empty(); goto done; @@ -630,7 +630,7 @@ } /* Read-only memory */ - if ( p2mt == p2m_ram_ro ) + if ( p2m_is_readonly(p2mt) ) sflags &= ~_PAGE_RW; // protect guest page tables @@ -807,8 +807,10 @@ return ((of | (of ^ nf)) == nf); } +/* type is only used to distinguish grant map pages from ordinary RAM + * i.e. non-p2m_is_grant() pages are treated as p2m_ram_rw. */ static int inline -shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d) +shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d, p2m_type_t type) { int res; mfn_t mfn; @@ -836,6 +838,20 @@ "which is owned by domain %d: %s\n", d->domain_id, mfn_x(mfn), owner->domain_id, res ? "success" : "failed"); + } + + /* Okay, it might still be a grant mapping PTE. Try it. */ + if ( unlikely(!res) && + (type == p2m_grant_map_rw || + (type == p2m_grant_map_ro && + !(shadow_l1e_get_flags(sl1e) & _PAGE_RW))) ) + { + /* It's a grant mapping. The grant table implementation will + already have checked that we're supposed to have access, so + we can just grab a reference directly. */ + mfn = shadow_l1e_get_mfn(sl1e); + if ( mfn_valid(mfn) ) + res = get_page_from_l1e(sl1e, d, page_get_owner(mfn_to_page(mfn))); } if ( unlikely(!res) ) @@ -1133,6 +1149,7 @@ static int shadow_set_l1e(struct vcpu *v, shadow_l1e_t *sl1e, shadow_l1e_t new_sl1e, + p2m_type_t new_type, mfn_t sl1mfn) { int flags = 0; @@ -1160,7 +1177,7 @@ /* About to install a new reference */ if ( shadow_mode_refcounts(d) ) { TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_GET_REF); - if ( shadow_get_page_from_l1e(new_sl1e, d) == 0 ) + if ( shadow_get_page_from_l1e(new_sl1e, d, new_type) == 0 ) { /* Doesn't look like a pagetable. */ flags |= SHADOW_SET_ERROR; @@ -2377,7 +2394,7 @@ gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt); l1e_propagate_from_guest(v, new_gl1e, gmfn, &new_sl1e, ft_prefetch, p2mt); - result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn); + result |= shadow_set_l1e(v, sl1p, new_sl1e, p2mt, sl1mfn); #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back); @@ -2436,8 +2453,8 @@ gfn = guest_l1e_get_gfn(gl1e); gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt); l1e_propagate_from_guest(v, gl1e, gmfn, &nsl1e, ft_prefetch, p2mt); - rc |= shadow_set_l1e(v, sl1p, nsl1e, sl1mfn); - + rc |= shadow_set_l1e(v, sl1p, nsl1e, p2mt, sl1mfn); + *snpl1p = gl1e; } }); @@ -2754,7 +2771,7 @@ /* Propagate the entry. */ l1e_propagate_from_guest(v, gl1e, gmfn, &sl1e, ft_prefetch, p2mt); - (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn); + (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, p2mt, sl1mfn); #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) if ( snpl1p != NULL ) @@ -3109,7 +3126,8 @@ gmfn = gfn_to_mfn_guest(d, gfn, &p2mt); if ( shadow_mode_refcounts(d) && - (!p2m_is_valid(p2mt) || (!p2m_is_mmio(p2mt) && !mfn_valid(gmfn))) ) + ((!p2m_is_valid(p2mt) && !p2m_is_grant(p2mt)) || + (!p2m_is_mmio(p2mt) && !mfn_valid(gmfn))) ) { perfc_incr(shadow_fault_bail_bad_gfn); SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", @@ -3207,7 +3225,7 @@ /* Calculate the shadow entry and write it */ l1e_propagate_from_guest(v, gw.l1e, gmfn, &sl1e, ft, p2mt); - r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn); + r = shadow_set_l1e(v, ptr_sl1e, sl1e, p2mt, sl1mfn); #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) if ( mfn_valid(gw.l1mfn) @@ -3260,7 +3278,7 @@ } /* Ignore attempts to write to read-only memory. */ - if ( (p2mt == p2m_ram_ro) && (ft == ft_demand_write) ) + if ( p2m_is_readonly(p2mt) && (ft == ft_demand_write) ) { static unsigned long lastpage; if ( xchg(&lastpage, va & PAGE_MASK) != (va & PAGE_MASK) ) @@ -3603,7 +3621,8 @@ shadow_l1e_t *sl1; sl1 = sh_linear_l1_table(v) + shadow_l1_linear_offset(va); /* Remove the shadow entry that maps this VA */ - (void) shadow_set_l1e(v, sl1, shadow_l1e_empty(), sl1mfn); + (void) shadow_set_l1e(v, sl1, shadow_l1e_empty(), + p2m_invalid, sl1mfn); } shadow_unlock(v->domain); /* Need the invlpg, to pick up the disappeareance of the sl1e */ @@ -4318,7 +4337,7 @@ /* Found it! Need to remove its write permissions. */ sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW); - r = shadow_set_l1e(v, sl1p, sl1e, smfn); + r = shadow_set_l1e(v, sl1p, sl1e, p2m_ram_rw, smfn); ASSERT( !(r & SHADOW_SET_ERROR) ); sh_unmap_domain_page(sl1p); @@ -4372,8 +4391,12 @@ /* Found it! Need to remove its write permissions. */ sl1mfn = shadow_l2e_get_mfn(*sl2p); sl1e = shadow_l1e_remove_flags(sl1e, _PAGE_RW); - r = shadow_set_l1e(v, sl1p, sl1e, sl1mfn); - ASSERT( !(r & SHADOW_SET_ERROR) ); + r = shadow_set_l1e(v, sl1p, sl1e, p2m_ram_rw, sl1mfn); + if ( r & SHADOW_SET_ERROR ) { + /* Can only currently happen if we found a grant-mapped + * page. Just make the guess fail. */ + return 0; + } TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_WRMAP_GUESS_FOUND); return 1; } @@ -4398,7 +4421,7 @@ && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(readonly_mfn)) ) { shadow_l1e_t ro_sl1e = shadow_l1e_remove_flags(*sl1e, _PAGE_RW); - (void) shadow_set_l1e(v, sl1e, ro_sl1e, sl1mfn); + (void) shadow_set_l1e(v, sl1e, ro_sl1e, p2m_ram_rw, sl1mfn); #if SHADOW_OPTIMIZATIONS & SHOPT_WRITABLE_HEURISTIC /* Remember the last shadow that we shot a writeable mapping in */ v->arch.paging.shadow.last_writeable_pte_smfn = mfn_x(base_sl1mfn); @@ -4426,7 +4449,8 @@ if ( (flags & _PAGE_PRESENT) && (mfn_x(shadow_l1e_get_mfn(*sl1e)) == mfn_x(target_mfn)) ) { - (void) shadow_set_l1e(v, sl1e, shadow_l1e_empty(), sl1mfn); + (void) shadow_set_l1e(v, sl1e, shadow_l1e_empty(), + p2m_invalid, sl1mfn); if ( (mfn_to_page(target_mfn)->count_info & PGC_count_mask) == 0 ) /* This breaks us cleanly out of the FOREACH macro */ done = 1; @@ -4444,17 +4468,21 @@ switch ( mfn_to_page(smfn)->u.sh.type ) { case SH_type_l1_shadow: - (void) shadow_set_l1e(v, ep, shadow_l1e_empty(), smfn); break; + (void) shadow_set_l1e(v, ep, shadow_l1e_empty(), p2m_invalid, smfn); + break; case SH_type_l2_shadow: #if GUEST_PAGING_LEVELS >= 3 case SH_type_l2h_shadow: #endif - (void) shadow_set_l2e(v, ep, shadow_l2e_empty(), smfn); break; + (void) shadow_set_l2e(v, ep, shadow_l2e_empty(), smfn); + break; #if GUEST_PAGING_LEVELS >= 4 case SH_type_l3_shadow: - (void) shadow_set_l3e(v, ep, shadow_l3e_empty(), smfn); break; + (void) shadow_set_l3e(v, ep, shadow_l3e_empty(), smfn); + break; case SH_type_l4_shadow: - (void) shadow_set_l4e(v, ep, shadow_l4e_empty(), smfn); break; + (void) shadow_set_l4e(v, ep, shadow_l4e_empty(), smfn); + break; #endif default: BUG(); /* Called with the wrong kind of shadow. */ } @@ -4562,7 +4590,7 @@ else mfn = gfn_to_mfn(v->domain, _gfn(gfn), &p2mt); - if ( p2mt == p2m_ram_ro ) + if ( p2m_is_readonly(p2mt) ) return _mfn(READONLY_GFN); if ( !p2m_is_ram(p2mt) ) return _mfn(BAD_GFN_TO_MFN); @@ -4966,7 +4994,7 @@ gfn = guest_l1e_get_gfn(*gl1e); mfn = shadow_l1e_get_mfn(*sl1e); gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt); - if ( mfn_x(gmfn) != mfn_x(mfn) ) + if ( !p2m_is_grant(p2mt) && mfn_x(gmfn) != mfn_x(mfn) ) AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn " --> %" PRI_mfn " != mfn %" PRI_mfn, gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); diff -r 8ee92b9f890f xen/common/grant_table.c --- a/xen/common/grant_table.c Fri Jul 10 18:12:13 2009 +0100 +++ b/xen/common/grant_table.c Mon Jul 13 12:00:26 2009 +0100 @@ -226,6 +226,15 @@ return; } + if ( unlikely(paging_mode_external(ld) && + (op->flags & (GNTMAP_device_map|GNTMAP_application_map| + GNTMAP_contains_pte))) ) + { + gdprintk(XENLOG_INFO, "No device mapping in HVM domain.\n"); + op->status = GNTST_general_error; + return; + } + if ( unlikely((rd = rcu_lock_domain_by_id(op->dom)) == NULL) ) { gdprintk(XENLOG_INFO, "Could not find domain %d\n", op->dom); @@ -343,6 +352,13 @@ if ( mfn_valid(frame) ) put_page(mfn_to_page(frame)); + if ( paging_mode_external(ld) ) + { + gdprintk(XENLOG_WARNING, "HVM guests can't grant map iomem\n"); + rc = GNTST_general_error; + goto undo_out; + } + if ( !iomem_access_permitted(rd, frame, frame) ) { gdprintk(XENLOG_WARNING, @@ -395,7 +411,12 @@ !(old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) && (act_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) ) { - if ( iommu_map_page(ld, mfn_to_gmfn(ld, frame), frame) ) + /* Shouldn't happen, because you can't use iommu in a HVM + * domain. */ + BUG_ON(paging_mode_translate(ld)); + /* We're not translated, so we know that gmfns and mfns are + the same things, so the IOMMU entry is always 1-to-1. */ + if ( iommu_map_page(ld, frame, frame) ) { rc = GNTST_general_error; goto undo_out; @@ -573,7 +594,8 @@ (old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) && !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) ) { - if ( iommu_unmap_page(ld, mfn_to_gmfn(ld, op->frame)) ) + BUG_ON(paging_mode_translate(ld)); + if ( iommu_unmap_page(ld, op->frame) ) { rc = GNTST_general_error; goto unmap_out; @@ -1717,7 +1739,7 @@ { BUG_ON(!(act->pin & GNTPIN_hstr_mask)); act->pin -= GNTPIN_hstr_inc; - if ( gnttab_release_host_mappings && + if ( gnttab_release_host_mappings(d) && !is_iomem_page(act->frame) ) put_page(mfn_to_page(act->frame)); } @@ -1736,7 +1758,7 @@ { BUG_ON(!(act->pin & GNTPIN_hstw_mask)); act->pin -= GNTPIN_hstw_inc; - if ( gnttab_release_host_mappings && + if ( gnttab_release_host_mappings(d) && !is_iomem_page(act->frame) ) { if ( gnttab_host_mapping_get_page_type(map, d, rd) ) diff -r 8ee92b9f890f xen/include/asm-ia64/grant_table.h --- a/xen/include/asm-ia64/grant_table.h Fri Jul 10 18:12:13 2009 +0100 +++ b/xen/include/asm-ia64/grant_table.h Mon Jul 13 12:00:26 2009 +0100 @@ -68,7 +68,7 @@ #define gnttab_host_mapping_get_page_type(op, ld, rd) \ (!((op)->flags & GNTMAP_readonly)) -#define gnttab_release_host_mappings 1 +#define gnttab_release_host_mappings(domain) 1 static inline int replace_grant_supported(void) { diff -r 8ee92b9f890f xen/include/asm-x86/grant_table.h --- a/xen/include/asm-x86/grant_table.h Fri Jul 10 18:12:13 2009 +0100 +++ b/xen/include/asm-x86/grant_table.h Mon Jul 13 12:00:26 2009 +0100 @@ -44,7 +44,7 @@ (((ld) == (rd)) || !paging_mode_external(rd))) /* Done implicitly when page tables are destroyed. */ -#define gnttab_release_host_mappings 0 +#define gnttab_release_host_mappings(domain) ( paging_mode_external(domain) ) static inline int replace_grant_supported(void) { diff -r 8ee92b9f890f xen/include/asm-x86/hvm/domain.h --- a/xen/include/asm-x86/hvm/domain.h Fri Jul 10 18:12:13 2009 +0100 +++ b/xen/include/asm-x86/hvm/domain.h Mon Jul 13 12:00:26 2009 +0100 @@ -31,6 +31,7 @@ #include #include #include +#include #include #include diff -r 8ee92b9f890f xen/include/asm-x86/hvm/support.h --- a/xen/include/asm-x86/hvm/support.h Fri Jul 10 18:12:13 2009 +0100 +++ b/xen/include/asm-x86/hvm/support.h Mon Jul 13 12:00:26 2009 +0100 @@ -71,7 +71,8 @@ enum hvm_copy_result { HVMCOPY_okay = 0, HVMCOPY_bad_gva_to_gfn, - HVMCOPY_bad_gfn_to_mfn + HVMCOPY_bad_gfn_to_mfn, + HVMCOPY_unemulatable }; /* diff -r 8ee92b9f890f xen/include/asm-x86/p2m.h --- a/xen/include/asm-x86/p2m.h Fri Jul 10 18:12:13 2009 +0100 +++ b/xen/include/asm-x86/p2m.h Mon Jul 13 12:00:26 2009 +0100 @@ -45,6 +45,10 @@ */ #define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START) +#ifdef __x86_64__ +#define HAVE_GRANT_MAP_P2M +#endif + /* * The upper levels of the p2m pagetable always contain full rights; all * variation in the access control bits is made in the level-1 PTEs. @@ -65,6 +69,12 @@ p2m_mmio_dm = 4, /* Reads and write go to the device model */ p2m_mmio_direct = 5, /* Read/write mapping of genuine MMIO area */ p2m_populate_on_demand = 6, /* Place-holder for empty memory */ + + /* Note that these can only be used if HAVE_GRANT_MAP_P2M is + defined. They get defined anyway so as to avoid lots of + #ifdef's everywhere else. */ + p2m_grant_map_rw = 7, /* Read/write grant mapping */ + p2m_grant_map_ro = 8, /* Read-only grant mapping */ } p2m_type_t; typedef enum { @@ -81,13 +91,19 @@ | p2m_to_mask(p2m_ram_logdirty) \ | p2m_to_mask(p2m_ram_ro)) +/* Grant mapping types, which map to a real machine frame in another + * VM */ +#define P2M_GRANT_TYPES (p2m_to_mask(p2m_grant_map_rw) \ + | p2m_to_mask(p2m_grant_map_ro) ) + /* MMIO types, which don't have to map to anything in the frametable */ #define P2M_MMIO_TYPES (p2m_to_mask(p2m_mmio_dm) \ | p2m_to_mask(p2m_mmio_direct)) /* Read-only types, which must have the _PAGE_RW bit clear in their PTEs */ #define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty) \ - | p2m_to_mask(p2m_ram_ro)) + | p2m_to_mask(p2m_ram_ro) \ + | p2m_to_mask(p2m_grant_map_ro) ) #define P2M_MAGIC_TYPES (p2m_to_mask(p2m_populate_on_demand)) @@ -96,6 +112,10 @@ #define p2m_is_mmio(_t) (p2m_to_mask(_t) & P2M_MMIO_TYPES) #define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES) #define p2m_is_magic(_t) (p2m_to_mask(_t) & P2M_MAGIC_TYPES) +#define p2m_is_grant(_t) (p2m_to_mask(_t) & P2M_GRANT_TYPES) +/* Grant types are *not* considered valid, because they can be + unmapped at any time and, unless you happen to be the shadow or p2m + implementations, there's no way of synchronising against that. */ #define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES)) /* Populate-on-demand */ @@ -161,8 +181,12 @@ /* Extract the type from the PTE flags that store it */ static inline p2m_type_t p2m_flags_to_type(unsigned long flags) { - /* Type is stored in the "available" bits, 9, 10 and 11 */ + /* Type is stored in the "available" bits */ +#ifdef __x86_64__ + return (flags >> 9) & 0x3fff; +#else return (flags >> 9) & 0x7; +#endif } /* Read the current domain's p2m table. Do not populate PoD pages. */ @@ -225,17 +249,6 @@ else return mfn_x(mfn); } - -/* Translate the frame number held in an l1e from guest to machine */ -static inline l1_pgentry_t -gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e) -{ - if ( unlikely(paging_mode_translate(d)) ) - l1e = l1e_from_pfn(gmfn_to_mfn(d, l1e_get_pfn(l1e)), - l1e_get_flags(l1e)); - return l1e; -} - /* Init the datastructures for later use by the p2m code */ int p2m_init(struct domain *d);