x86 guest pagetable walker: check for invalid bits in pagetable entries.
Some bits are reserved in x86 pagetable entries and must be zero; the
MMU should raise a pagefault if it sees them, with bit 3 set in the
error code. Xen's software pagetable walker hasn't been doing this,
which has been OK because no guest OSes actually have invalid bits set
except in error cases where things are already very wrong.
Xen's shadow pagetable code deliberately sets these bits as part of the
not-present-entry fast path, so if we're to support shadow-on-shadow
nested HVM, we need to start checking them.
Signed-off-by: Tim Deeegan <Tim.Deegan@xxxxxxxxxx>
diff -r 38ad3633ecaf xen/arch/x86/cpu/mtrr/main.c
--- a/xen/arch/x86/cpu/mtrr/main.c Wed Oct 13 12:01:30 2010 +0100
+++ b/xen/arch/x86/cpu/mtrr/main.c Thu Oct 14 15:40:46 2010 +0100
@@ -600,6 +600,8 @@ struct mtrr_value {
unsigned long lsize;
};
+unsigned int paddr_bits __read_mostly = 36;
+
/**
* mtrr_bp_init - initialize mtrrs on the boot CPU
*
@@ -620,17 +622,16 @@ void __init mtrr_bp_init(void)
Intel will implement it to when they extend the address
bus of the Xeon. */
if (cpuid_eax(0x80000000) >= 0x80000008) {
- u32 phys_addr;
- phys_addr = cpuid_eax(0x80000008) & 0xff;
+ paddr_bits = cpuid_eax(0x80000008) & 0xff;
/* CPUID workaround for Intel 0F33/0F34 CPU */
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
boot_cpu_data.x86 == 0xF &&
boot_cpu_data.x86_model == 0x3 &&
(boot_cpu_data.x86_mask == 0x3 ||
boot_cpu_data.x86_mask == 0x4))
- phys_addr = 36;
+ paddr_bits = 36;
- size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) -
1);
+ size_or_mask = ~((1ULL << (paddr_bits - PAGE_SHIFT)) -
1);
size_and_mask = ~size_or_mask & 0xfffff00000ULL;
} else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
boot_cpu_data.x86 == 6) {
diff -r 38ad3633ecaf xen/arch/x86/mm/guest_walk.c
--- a/xen/arch/x86/mm/guest_walk.c Wed Oct 13 12:01:30 2010 +0100
+++ b/xen/arch/x86/mm/guest_walk.c Thu Oct 14 15:40:46 2010 +0100
@@ -63,7 +63,7 @@ static uint32_t mandatory_flags(struct v
&& !(pfec & PFEC_user_mode) )
pfec &= ~PFEC_write_access;
- return flags[(pfec & 0x1f) >> 1];
+ return flags[(pfec & 0x1f) >> 1] | _PAGE_INVALID_BITS;
}
/* Modify a guest pagetable entry to set the Accessed and Dirty bits.
@@ -131,17 +131,19 @@ guest_walk_tables(struct vcpu *v, struct
guest_l3e_t *l3p = NULL;
guest_l4e_t *l4p;
#endif
- uint32_t gflags, mflags, rc = 0;
+ uint32_t gflags, mflags, iflags, rc = 0;
int pse;
perfc_incr(guest_walk);
memset(gw, 0, sizeof(*gw));
gw->va = va;
- /* Mandatory bits that must be set in every entry. We invert NX, to
- * calculate as if there were an "X" bit that allowed access.
- * We will accumulate, in rc, the set of flags that are missing. */
+ /* Mandatory bits that must be set in every entry. We invert NX and
+ * the invalid bits, to calculate as if there were an "X" bit that
+ * allowed access. We will accumulate, in rc, the set of flags that
+ * are missing/unwanted. */
mflags = mandatory_flags(v, pfec);
+ iflags = (_PAGE_NX_BIT | _PAGE_INVALID_BITS);
#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
@@ -150,7 +152,7 @@ guest_walk_tables(struct vcpu *v, struct
gw->l4mfn = top_mfn;
l4p = (guest_l4e_t *) top_map;
gw->l4e = l4p[guest_l4_table_offset(va)];
- gflags = guest_l4e_get_flags(gw->l4e) ^ _PAGE_NX_BIT;
+ gflags = guest_l4e_get_flags(gw->l4e) ^ iflags;
rc |= ((gflags & mflags) ^ mflags);
if ( rc & _PAGE_PRESENT ) goto out;
@@ -164,7 +166,7 @@ guest_walk_tables(struct vcpu *v, struct
goto out;
/* Get the l3e and check its flags*/
gw->l3e = l3p[guest_l3_table_offset(va)];
- gflags = guest_l3e_get_flags(gw->l3e) ^ _PAGE_NX_BIT;
+ gflags = guest_l3e_get_flags(gw->l3e) ^ iflags;
rc |= ((gflags & mflags) ^ mflags);
if ( rc & _PAGE_PRESENT )
goto out;
@@ -201,7 +203,7 @@ guest_walk_tables(struct vcpu *v, struct
#endif /* All levels... */
- gflags = guest_l2e_get_flags(gw->l2e) ^ _PAGE_NX_BIT;
+ gflags = guest_l2e_get_flags(gw->l2e) ^ iflags;
rc |= ((gflags & mflags) ^ mflags);
if ( rc & _PAGE_PRESENT )
goto out;
@@ -246,7 +248,7 @@ guest_walk_tables(struct vcpu *v, struct
if(l1p == NULL)
goto out;
gw->l1e = l1p[guest_l1_table_offset(va)];
- gflags = guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT;
+ gflags = guest_l1e_get_flags(gw->l1e) ^ iflags;
rc |= ((gflags & mflags) ^ mflags);
}
diff -r 38ad3633ecaf xen/arch/x86/mm/hap/guest_walk.c
--- a/xen/arch/x86/mm/hap/guest_walk.c Wed Oct 13 12:01:30 2010 +0100
+++ b/xen/arch/x86/mm/hap/guest_walk.c Thu Oct 14 15:40:46 2010 +0100
@@ -99,6 +99,9 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
if ( missing & _PAGE_PRESENT )
pfec[0] &= ~PFEC_page_present;
+ if ( missing & _PAGE_INVALID_BITS )
+ pfec[0] |= PFEC_reserved_bit;
+
if ( missing & _PAGE_PAGED )
pfec[0] = PFEC_page_paged;
diff -r 38ad3633ecaf xen/arch/x86/mm/shadow/multi.c
--- a/xen/arch/x86/mm/shadow/multi.c Wed Oct 13 12:01:30 2010 +0100
+++ b/xen/arch/x86/mm/shadow/multi.c Thu Oct 14 15:40:46 2010 +0100
@@ -3181,6 +3181,8 @@ static int sh_page_fault(struct vcpu *v,
perfc_incr(shadow_fault_bail_real_fault);
SHADOW_PRINTK("not a shadow fault\n");
reset_early_unshadow(v);
+ if ( (rc & _PAGE_INVALID_BITS) )
+ regs->error_code |= PFEC_reserved_bit;
goto propagate;
}
@@ -3772,6 +3774,7 @@ sh_gva_to_gfn(struct vcpu *v, unsigned l
{
walk_t gw;
gfn_t gfn;
+ uint32_t missing;
#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
/* Check the vTLB cache first */
@@ -3780,10 +3783,12 @@ sh_gva_to_gfn(struct vcpu *v, unsigned l
return vtlb_gfn;
#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
- if ( sh_walk_guest_tables(v, va, &gw, pfec[0]) != 0 )
- {
- if ( !(guest_l1e_get_flags(gw.l1e) & _PAGE_PRESENT) )
+ if ( (missing = sh_walk_guest_tables(v, va, &gw, pfec[0])) != 0 )
+ {
+ if ( (missing & _PAGE_PRESENT) )
pfec[0] &= ~PFEC_page_present;
+ if ( missing & _PAGE_INVALID_BITS )
+ pfec[0] |= PFEC_reserved_bit;
return INVALID_GFN;
}
gfn = guest_walk_to_gfn(&gw);
diff -r 38ad3633ecaf xen/include/asm-x86/guest_pt.h
--- a/xen/include/asm-x86/guest_pt.h Wed Oct 13 12:01:30 2010 +0100
+++ b/xen/include/asm-x86/guest_pt.h Thu Oct 14 15:40:46 2010 +0100
@@ -204,6 +204,17 @@ guest_supports_nx(struct vcpu *v)
}
+/* Some bits are invalid in any pagetable entry. */
+#if GUEST_PAGING_LEVELS == 2
+#define _PAGE_INVALID_BITS (0)
+#elif GUEST_PAGING_LEVELS == 3
+#define _PAGE_INVALID_BITS \
+ get_pte_flags(((1ull<<63) - 1) & ~((1ull<<paddr_bits) - 1))
+#else /* GUEST_PAGING_LEVELS == 4 */
+#define _PAGE_INVALID_BITS \
+ get_pte_flags(((1ull<<52) - 1) & ~((1ull<<paddr_bits) - 1))
+#endif
+
/* Type used for recording a walk through guest pagetables. It is
* filled in by the pagetable walk function, and also used as a cache
diff -r 38ad3633ecaf xen/include/asm-x86/processor.h
--- a/xen/include/asm-x86/processor.h Wed Oct 13 12:01:30 2010 +0100
+++ b/xen/include/asm-x86/processor.h Thu Oct 14 15:40:46 2010 +0100
@@ -195,6 +195,9 @@ extern int phys_proc_id[NR_CPUS];
extern int phys_proc_id[NR_CPUS];
extern int cpu_core_id[NR_CPUS];
extern int opt_cpu_info;
+
+/* Maximum width of physical addresses supported by the hardware */
+extern unsigned int paddr_bits;
extern void identify_cpu(struct cpuinfo_x86 *);
extern void setup_clear_cpu_cap(unsigned int);
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|