# HG changeset patch
# User Tim Deegan <Tim.Deegan@xxxxxxxxxx>
# Date 1311857109 -3600
# Node ID ba78ea7784c9eaed396cfe6c04fb8e62c9dd8407
# Parent fa4e2ca9ecffbc432b451f495ad0a403644a6be8
x86/mm: Handle 1GiB superpages in the pagetable walker.
This allows HAP guests to use 1GiB superpages. Shadow and PV guests
still can't use them without more support in shadow/* and mm.c.
Signed-off-by: Christoph Egger <Christoph.Egger@xxxxxxx>
Signed-off-by: Tim Deegan <Tim.Deegan@xxxxxxxxxx>
---
diff -r fa4e2ca9ecff -r ba78ea7784c9 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c Tue Jul 26 18:37:32 2011 +0100
+++ b/xen/arch/x86/hvm/hvm.c Thu Jul 28 13:45:09 2011 +0100
@@ -2385,6 +2385,7 @@
unsigned int *ecx, unsigned int *edx)
{
struct vcpu *v = current;
+ struct domain *d = v->domain;
unsigned int count = *ecx;
if ( cpuid_viridian_leaves(input, eax, ebx, ecx, edx) )
@@ -2393,7 +2394,7 @@
if ( cpuid_hypervisor_leaves(input, count, eax, ebx, ecx, edx) )
return;
- domain_cpuid(v->domain, input, *ecx, eax, ebx, ecx, edx);
+ domain_cpuid(d, input, *ecx, eax, ebx, ecx, edx);
switch ( input )
{
@@ -2429,7 +2430,7 @@
{
if ( !(v->arch.xcr0 & (1ULL << sub_leaf)) )
continue;
- domain_cpuid(v->domain, input, sub_leaf, &_eax, &_ebx, &_ecx,
+ domain_cpuid(d, input, sub_leaf, &_eax, &_ebx, &_ecx,
&_edx);
if ( (_eax + _ebx) > *ebx )
*ebx = _eax + _ebx;
@@ -2440,9 +2441,12 @@
case 0x80000001:
/* We expose RDTSCP feature to guest only when
tsc_mode == TSC_MODE_DEFAULT and host_tsc_is_safe() returns 1 */
- if ( v->domain->arch.tsc_mode != TSC_MODE_DEFAULT ||
+ if ( d->arch.tsc_mode != TSC_MODE_DEFAULT ||
!host_tsc_is_safe() )
*edx &= ~cpufeat_mask(X86_FEATURE_RDTSCP);
+ /* Hide 1GB-superpage feature if we can't emulate it. */
+ if (!hvm_pse1gb_supported(d))
+ *edx &= ~cpufeat_mask(X86_FEATURE_PAGE1GB);
break;
}
}
diff -r fa4e2ca9ecff -r ba78ea7784c9 xen/arch/x86/mm/guest_walk.c
--- a/xen/arch/x86/mm/guest_walk.c Tue Jul 26 18:37:32 2011 +0100
+++ b/xen/arch/x86/mm/guest_walk.c Thu Jul 28 13:45:09 2011 +0100
@@ -134,7 +134,8 @@
guest_l4e_t *l4p;
#endif
uint32_t gflags, mflags, iflags, rc = 0;
- int pse, smep;
+ int smep;
+ bool_t pse1G = 0, pse2M = 0;
perfc_incr(guest_walk);
memset(gw, 0, sizeof(*gw));
@@ -181,6 +182,37 @@
rc |= ((gflags & mflags) ^ mflags);
if ( rc & _PAGE_PRESENT )
goto out;
+
+ pse1G = (gflags & _PAGE_PSE) && guest_supports_1G_superpages(v);
+
+ if ( pse1G )
+ {
+ /* Generate a fake l1 table entry so callers don't all
+ * have to understand superpages. */
+ gfn_t start = guest_l3e_get_gfn(gw->l3e);
+ /* Grant full access in the l1e, since all the guest entry's
+ * access controls are enforced in the l3e. */
+ int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
+ _PAGE_ACCESSED|_PAGE_DIRTY);
+ /* Import cache-control bits. Note that _PAGE_PAT is actually
+ * _PAGE_PSE, and it is always set. We will clear it in case
+ * _PAGE_PSE_PAT (bit 12, i.e. first bit of gfn) is clear. */
+ flags |= (guest_l3e_get_flags(gw->l3e)
+ & (_PAGE_PAT|_PAGE_PWT|_PAGE_PCD));
+ if ( !(gfn_x(start) & 1) )
+ /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */
+ flags &= ~_PAGE_PAT;
+
+ if ( gfn_x(start) & GUEST_L3_GFN_MASK & ~0x1 )
+ rc |= _PAGE_INVALID_BITS;
+
+ /* Increment the pfn by the right number of 4k pages. */
+ start = _gfn((gfn_x(start) & ~GUEST_L3_GFN_MASK) +
+ ((va >> PAGE_SHIFT) & GUEST_L3_GFN_MASK));
+ gw->l1e = guest_l1e_from_gfn(start, flags);
+ gw->l2mfn = gw->l1mfn = _mfn(INVALID_MFN);
+ goto set_ad;
+ }
#else /* PAE only... */
@@ -219,10 +251,9 @@
if ( rc & _PAGE_PRESENT )
goto out;
- pse = (guest_supports_superpages(v) &&
- (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE));
+ pse2M = (gflags & _PAGE_PSE) && guest_supports_superpages(v);
- if ( pse )
+ if ( pse2M )
{
/* Special case: this guest VA is in a PSE superpage, so there's
* no guest l1e. We make one up so that the propagation code
@@ -242,9 +273,7 @@
/* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */
flags &= ~_PAGE_PAT;
-#define GUEST_L2_GFN_ALIGN (1 << (GUEST_L2_PAGETABLE_SHIFT - \
- GUEST_L1_PAGETABLE_SHIFT))
- if ( gfn_x(start) & (GUEST_L2_GFN_ALIGN - 1) & ~0x1 )
+ if ( gfn_x(start) & GUEST_L2_GFN_MASK & ~0x1 )
{
#if GUEST_PAGING_LEVELS == 2
/*
@@ -262,7 +291,7 @@
/* Increment the pfn by the right number of 4k pages.
* Mask out PAT and invalid bits. */
- start = _gfn((gfn_x(start) & ~(GUEST_L2_GFN_ALIGN - 1)) +
+ start = _gfn((gfn_x(start) & ~GUEST_L2_GFN_MASK) +
guest_l1_table_offset(va));
gw->l1e = guest_l1e_from_gfn(start, flags);
gw->l1mfn = _mfn(INVALID_MFN);
@@ -282,6 +311,9 @@
rc |= ((gflags & mflags) ^ mflags);
}
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+set_ad:
+#endif
/* Now re-invert the user-mode requirement for SMEP. */
if ( smep )
rc ^= _PAGE_USER;
@@ -295,17 +327,21 @@
#if GUEST_PAGING_LEVELS == 4 /* 64-bit only... */
if ( set_ad_bits(l4p + guest_l4_table_offset(va), &gw->l4e, 0) )
paging_mark_dirty(d, mfn_x(gw->l4mfn));
- if ( set_ad_bits(l3p + guest_l3_table_offset(va), &gw->l3e, 0) )
+ if ( set_ad_bits(l3p + guest_l3_table_offset(va), &gw->l3e,
+ (pse1G && (pfec & PFEC_write_access))) )
paging_mark_dirty(d, mfn_x(gw->l3mfn));
#endif
- if ( set_ad_bits(l2p + guest_l2_table_offset(va), &gw->l2e,
- (pse && (pfec & PFEC_write_access))) )
- paging_mark_dirty(d, mfn_x(gw->l2mfn));
- if ( !pse )
+ if ( !pse1G )
{
- if ( set_ad_bits(l1p + guest_l1_table_offset(va), &gw->l1e,
- (pfec & PFEC_write_access)) )
- paging_mark_dirty(d, mfn_x(gw->l1mfn));
+ if ( set_ad_bits(l2p + guest_l2_table_offset(va), &gw->l2e,
+ (pse2M && (pfec & PFEC_write_access))) )
+ paging_mark_dirty(d, mfn_x(gw->l2mfn));
+ if ( !pse2M )
+ {
+ if ( set_ad_bits(l1p + guest_l1_table_offset(va), &gw->l1e,
+ (pfec & PFEC_write_access)) )
+ paging_mark_dirty(d, mfn_x(gw->l1mfn));
+ }
}
}
diff -r fa4e2ca9ecff -r ba78ea7784c9 xen/include/asm-x86/guest_pt.h
--- a/xen/include/asm-x86/guest_pt.h Tue Jul 26 18:37:32 2011 +0100
+++ b/xen/include/asm-x86/guest_pt.h Thu Jul 28 13:45:09 2011 +0100
@@ -177,6 +177,11 @@
#endif /* GUEST_PAGING_LEVELS != 2 */
+/* Mask of the GFNs covered by an L2 or L3 superpage */
+#define GUEST_L2_GFN_MASK (GUEST_L1_PAGETABLE_ENTRIES - 1)
+#define GUEST_L3_GFN_MASK \
+ ((GUEST_L2_PAGETABLE_ENTRIES * GUEST_L1_PAGETABLE_ENTRIES) - 1)
+
/* Which pagetable features are supported on this vcpu? */
@@ -194,6 +199,12 @@
}
static inline int
+guest_supports_1G_superpages(struct vcpu *v)
+{
+ return (GUEST_PAGING_LEVELS >= 4 && hvm_pse1gb_supported(v->domain));
+}
+
+static inline int
guest_supports_nx(struct vcpu *v)
{
if ( GUEST_PAGING_LEVELS == 2 || !cpu_has_nx )
diff -r fa4e2ca9ecff -r ba78ea7784c9 xen/include/asm-x86/hvm/hvm.h
--- a/xen/include/asm-x86/hvm/hvm.h Tue Jul 26 18:37:32 2011 +0100
+++ b/xen/include/asm-x86/hvm/hvm.h Thu Jul 28 13:45:09 2011 +0100
@@ -219,11 +219,16 @@
#define hvm_nx_enabled(v) \
(!!((v)->arch.hvm_vcpu.guest_efer & EFER_NX))
+/* Can we use superpages in the HAP p2m table? */
#define hvm_hap_has_1gb(d) \
(hvm_funcs.hap_capabilities & HVM_HAP_SUPERPAGE_1GB)
#define hvm_hap_has_2mb(d) \
(hvm_funcs.hap_capabilities & HVM_HAP_SUPERPAGE_2MB)
+/* Can the guest use 1GB superpages in its own pagetables? */
+#define hvm_pse1gb_supported(d) \
+ (cpu_has_page1gb && paging_mode_hap(d))
+
#ifdef __x86_64__
#define hvm_long_mode_enabled(v) \
((v)->arch.hvm_vcpu.guest_efer & EFER_LMA)
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|