# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1225882641 0
# Node ID 5fd51e1e9c798f18a06a43c4cb83df93ac0698dd
# Parent 1e437b5b418a1c47c96f8c65442bc63ab35f671c
x86: PV support for hugepages
Hugepage support must be enabled via the hypervisor command line
option "allowhugepage". There is currently no support in the tools for
saving/restoring/migrating guests who use hugepages.
Signed-off-by: Dave McCracken <dave.mccracken@xxxxxxxxxx>
---
xen/arch/x86/mm.c | 89 ++++++++++++++++++++++++++++++++------
xen/arch/x86/traps.c | 10 ++--
xen/include/asm-x86/mm.h | 1
xen/include/asm-x86/x86_32/page.h | 2
xen/include/asm-x86/x86_64/page.h | 2
5 files changed, 86 insertions(+), 18 deletions(-)
diff -r 1e437b5b418a -r 5fd51e1e9c79 xen/arch/x86/mm.c
--- a/xen/arch/x86/mm.c Wed Nov 05 10:26:19 2008 +0000
+++ b/xen/arch/x86/mm.c Wed Nov 05 10:57:21 2008 +0000
@@ -160,6 +160,9 @@ unsigned long total_pages;
#define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
+int opt_allow_hugepage;
+boolean_param("allowhugepage", opt_allow_hugepage);
+
#define l1_disallow_mask(d) \
((d != dom_io) && \
(rangeset_is_empty((d)->iomem_caps) && \
@@ -584,6 +587,28 @@ static int get_page_and_type_from_pagenr
put_page(page);
return rc;
+}
+
+static int get_data_page(
+ struct page_info *page, struct domain *d, int writeable)
+{
+ int rc;
+
+ if ( writeable )
+ rc = get_page_and_type(page, d, PGT_writable_page);
+ else
+ rc = get_page(page, d);
+
+ return rc;
+}
+
+static void put_data_page(
+ struct page_info *page, int writeable)
+{
+ if ( writeable )
+ put_page_and_type(page);
+ else
+ put_page(page);
}
/*
@@ -700,10 +725,9 @@ get_page_from_l1e(
* contribute to writeable mapping refcounts. (This allows the
* qemu-dm helper process in dom0 to map the domain's memory without
* messing up the count of "real" writable mappings.) */
- okay = (((l1f & _PAGE_RW) &&
- !(unlikely(paging_mode_external(d) && (d != curr->domain))))
- ? get_page_and_type(page, d, PGT_writable_page)
- : get_page(page, d));
+ okay = get_data_page(
+ page, d,
+ (l1f & _PAGE_RW) && !(paging_mode_external(d) && (d != curr->domain)));
if ( !okay )
{
MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
@@ -751,6 +775,7 @@ get_page_from_l2e(
get_page_from_l2e(
l2_pgentry_t l2e, unsigned long pfn, struct domain *d)
{
+ unsigned long mfn = l2e_get_pfn(l2e);
int rc;
if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
@@ -762,10 +787,37 @@ get_page_from_l2e(
return -EINVAL;
}
- rc = get_page_and_type_from_pagenr(
- l2e_get_pfn(l2e), PGT_l1_page_table, d, 0, 0);
- if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
- rc = 0;
+ if ( !(l2e_get_flags(l2e) & _PAGE_PSE) )
+ {
+ rc = get_page_and_type_from_pagenr(mfn, PGT_l1_page_table, d, 0, 0);
+ if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
+ rc = 0;
+ }
+ else if ( !opt_allow_hugepage || (mfn & (L1_PAGETABLE_ENTRIES-1)) )
+ {
+ rc = -EINVAL;
+ }
+ else
+ {
+ unsigned long m = mfn;
+ int writeable = !!(l2e_get_flags(l2e) & _PAGE_RW);
+
+ do {
+ rc = get_data_page(mfn_to_page(m), d, writeable);
+ if ( unlikely(!rc) )
+ {
+ while ( m-- > mfn )
+ put_data_page(mfn_to_page(m), writeable);
+ return -EINVAL;
+ }
+ } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+
+#ifdef __x86_64__
+ map_pages_to_xen(
+ (unsigned long)mfn_to_virt(mfn), mfn, L1_PAGETABLE_ENTRIES,
+ PAGE_HYPERVISOR | l2e_get_flags(l2e));
+#endif
+ }
return rc;
}
@@ -954,13 +1006,24 @@ void put_page_from_l1e(l1_pgentry_t l1e,
*/
static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
{
- if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
- (l2e_get_pfn(l2e) != pfn) )
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || (l2e_get_pfn(l2e) == pfn) )
+ return 1;
+
+ if ( l2e_get_flags(l2e) & _PAGE_PSE )
+ {
+ unsigned long mfn = l2e_get_pfn(l2e), m = mfn;
+ int writeable = l2e_get_flags(l2e) & _PAGE_RW;
+ ASSERT(opt_allow_hugepage && !(mfn & (L1_PAGETABLE_ENTRIES-1)));
+ do {
+ put_data_page(mfn_to_page(m), writeable);
+ } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+ }
+ else
{
put_page_and_type(l2e_get_page(l2e));
- return 0;
- }
- return 1;
+ }
+
+ return 0;
}
static int __put_page_type(struct page_info *, int preemptible);
diff -r 1e437b5b418a -r 5fd51e1e9c79 xen/arch/x86/traps.c
--- a/xen/arch/x86/traps.c Wed Nov 05 10:26:19 2008 +0000
+++ b/xen/arch/x86/traps.c Wed Nov 05 10:57:21 2008 +0000
@@ -723,7 +723,8 @@ static void pv_cpuid(struct cpu_user_reg
{
/* Modify Feature Information. */
__clear_bit(X86_FEATURE_VME, &d);
- __clear_bit(X86_FEATURE_PSE, &d);
+ if ( !opt_allow_hugepage )
+ __clear_bit(X86_FEATURE_PSE, &d);
__clear_bit(X86_FEATURE_PGE, &d);
__clear_bit(X86_FEATURE_MCE, &d);
__clear_bit(X86_FEATURE_MCA, &d);
@@ -2003,9 +2004,12 @@ static int emulate_privileged_op(struct
case 4: /* Read CR4 */
/*
* Guests can read CR4 to see what features Xen has enabled. We
- * therefore lie about PGE & PSE as they are unavailable to guests.
+ * therefore lie about PGE as it is unavailable to guests.
+ * Also disallow PSE if hugepages are not enabled.
*/
- *reg = read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE);
+ *reg = read_cr4() & ~X86_CR4_PGE;
+ if ( !opt_allow_hugepage )
+ *reg &= ~X86_CR4_PSE;
break;
default:
diff -r 1e437b5b418a -r 5fd51e1e9c79 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Wed Nov 05 10:26:19 2008 +0000
+++ b/xen/include/asm-x86/mm.h Wed Nov 05 10:57:21 2008 +0000
@@ -263,6 +263,7 @@ pae_copy_root(struct vcpu *v, l3_pgentry
int check_descriptor(const struct domain *, struct desc_struct *d);
+extern int opt_allow_hugepage;
/******************************************************************************
* With shadow pagetables, the different kinds of address start
diff -r 1e437b5b418a -r 5fd51e1e9c79 xen/include/asm-x86/x86_32/page.h
--- a/xen/include/asm-x86/x86_32/page.h Wed Nov 05 10:26:19 2008 +0000
+++ b/xen/include/asm-x86/x86_32/page.h Wed Nov 05 10:57:21 2008 +0000
@@ -112,7 +112,7 @@ extern unsigned int PAGE_HYPERVISOR_NOCA
#define BASE_DISALLOW_MASK (0xFFFFF198U & ~_PAGE_NX)
#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
-#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
+#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK & ~_PAGE_PSE)
#define L3_DISALLOW_MASK 0xFFFFF1FEU /* must-be-zero */
#endif /* __X86_32_PAGE_H__ */
diff -r 1e437b5b418a -r 5fd51e1e9c79 xen/include/asm-x86/x86_64/page.h
--- a/xen/include/asm-x86/x86_64/page.h Wed Nov 05 10:26:19 2008 +0000
+++ b/xen/include/asm-x86/x86_64/page.h Wed Nov 05 10:57:21 2008 +0000
@@ -115,7 +115,7 @@ typedef l4_pgentry_t root_pgentry_t;
#define BASE_DISALLOW_MASK (0xFF800198U & ~_PAGE_NX)
#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
-#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
+#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK & ~_PAGE_PSE)
#define L3_DISALLOW_MASK (BASE_DISALLOW_MASK)
#define L4_DISALLOW_MASK (BASE_DISALLOW_MASK)
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|