Hi,
This patch adds initial support for PAE paging to xen. It's
against cset 1.1442 (today in the morning).
This patch does:
* boot Xen itself with PAE paging enabled.
* add PAE support to the dom0 domain builder.
Not yet done (some details come with another mail):
* fix hypercall interfaces to handle 64bit page table entries.
* actually use memory above 4GB (depends on hypercall interface
changes).
* boot something else than domain 0.
* shadow mode support.
I'll submit xenlinux patches (hopefully) later this week or next
week. Current state can be found @ http://dl.bytesex.org/patches/
Some notes on the design:
* There are two new config options: CONFIG_X86_PAE (boolean,
same name Linux uses to simply things) and
CONFIG_PAGING_LEVELS (int, possible values are 2,3,4). I've
used #if CONFIG_PAGING_LEVELS for stuff which simply depends
on the number of paging levels in the code common for
x86-32/64, and CONFIG_X86_PAE for special PAE quirks or
i386-only stuff. I've tried to avoid ifdefs if possible
though, often I rearranged code to make it work in both
PAE and non-PAE case instead.
* idle_pg_table: 3rd level is statically initialized, 2nd
level is contignous in physical and virtual memory, so it can
be addressed linear (the dom0 builder uses the same trick to
simplify things a bit btw.). There are two new symbols:
idle_pg_table_l3 and idle_pg_table_l2 for the two tables.
idle_pg_table is aliased to the toplevel page table, i.e.
idle_pg_table_l3 in PAE mode and idle_pg_table_l2 in non-pae
mode. The idle l3 table is actually never ever touched after
boot, the l2 table is accessed via idle_pg_table_l2 and
addressed linear in both PAE and non-PAE mode.
please apply. comments & questions are welcome.
Gerd
PS: Some bits in this patch are from Scott Parish <srparish@xxxxxxxxxx>
Signed-off-by: Gerd Knorr <kraxel@xxxxxxxxxxx>
Index: xen/include/asm-x86/config.h
===================================================================
--- xen.orig/include/asm-x86/config.h 2005-05-13 12:37:10.000000000 +0200
+++ xen/include/asm-x86/config.h 2005-05-13 12:58:42.000000000 +0200
@@ -9,6 +9,19 @@
#define CONFIG_VMX 1
+#if defined(__i386__)
+// # define CONFIG_X86_PAE 1 /* yes */
+# undef CONFIG_X86_PAE /* no */
+#endif
+
+#if defined(__x86_64)
+# define CONFIG_PAGING_LEVELS 4
+#elif defined(CONFIG_X86_PAE)
+# define CONFIG_PAGING_LEVELS 3
+#else
+# define CONFIG_PAGING_LEVELS 2
+#endif
+
#define CONFIG_X86 1
#define CONFIG_SHADOW 1
@@ -194,7 +207,7 @@ extern unsigned long _end; /* standard E
* Per-domain mappings ( 4MB)
* Shadow linear pagetable ( 4MB) ( 8MB)
* Guest linear pagetable ( 4MB) ( 8MB)
- * Machine-to-physical translation table [writable] ( 4MB)
+ * Machine-to-physical translation table [writable] ( 4MB) (16MB)
* Frame-info table (24MB) (96MB)
* * Start of guest inaccessible area
* Machine-to-physical translation table [read-only] ( 4MB)
@@ -208,8 +221,8 @@ extern unsigned long _end; /* standard E
#ifdef CONFIG_X86_PAE
# define LINEARPT_MBYTES 8
-# define MACHPHYS_MBYTES 4 /* KAF: This needs to be bigger */
-# define FRAMETABLE_MBYTES 96 /* 16 GB mem limit (total) */
+# define MACHPHYS_MBYTES 16 /* 1 MB needed per 1 GB memory */
+# define FRAMETABLE_MBYTES (MACHPHYS_MBYTES * 6)
#else
# define LINEARPT_MBYTES 4
# define MACHPHYS_MBYTES 4
@@ -242,21 +255,21 @@ extern unsigned long _end; /* standard E
#define GUEST_SEGMENT_MAX_ADDR RO_MPT_VIRT_END
#ifdef CONFIG_X86_PAE
-/* Hypervisor owns top 144MB of virtual address space. */
-# define __HYPERVISOR_VIRT_START 0xF7000000
-# define HYPERVISOR_VIRT_START (0xF7000000UL)
+/* Hypervisor owns top 168MB of virtual address space. */
+# define __HYPERVISOR_VIRT_START 0xF5800000
+# define HYPERVISOR_VIRT_START (0xF5800000UL)
#else
/* Hypervisor owns top 64MB of virtual address space. */
# define __HYPERVISOR_VIRT_START 0xFC000000
# define HYPERVISOR_VIRT_START (0xFC000000UL)
#endif
-#define ROOT_PAGETABLE_FIRST_XEN_SLOT \
+#define L2_PAGETABLE_FIRST_XEN_SLOT \
(HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT)
-#define ROOT_PAGETABLE_LAST_XEN_SLOT \
+#define L2_PAGETABLE_LAST_XEN_SLOT \
(~0UL >> L2_PAGETABLE_SHIFT)
-#define ROOT_PAGETABLE_XEN_SLOTS \
- (ROOT_PAGETABLE_LAST_XEN_SLOT - ROOT_PAGETABLE_FIRST_XEN_SLOT + 1)
+#define L2_PAGETABLE_XEN_SLOTS \
+ (L2_PAGETABLE_LAST_XEN_SLOT - L2_PAGETABLE_FIRST_XEN_SLOT + 1)
#define PGT_base_page_table PGT_l2_page_table
Index: xen/arch/x86/setup.c
===================================================================
--- xen.orig/arch/x86/setup.c 2005-05-13 12:37:10.000000000 +0200
+++ xen/arch/x86/setup.c 2005-05-13 12:37:42.000000000 +0200
@@ -70,7 +70,7 @@ extern int do_timer_lists_from_pit;
struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1 };
-#if defined(__x86_64__)
+#if CONFIG_PAGING_LEVELS > 2
unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE;
#else
unsigned long mmu_cr4_features = X86_CR4_PSE | X86_CR4_PGE;
Index: xen/arch/x86/domain_build.c
===================================================================
--- xen.orig/arch/x86/domain_build.c 2005-05-13 12:37:10.000000000 +0200
+++ xen/arch/x86/domain_build.c 2005-05-13 12:37:42.000000000 +0200
@@ -44,15 +44,15 @@ boolean_param("dom0_translate", opt_dom0
#if defined(__i386__)
/* No ring-3 access in initial leaf page tables. */
#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
+#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#define L3_PROT (_PAGE_PRESENT)
#elif defined(__x86_64__)
/* Allow ring-3 access in long mode as guest cannot use ring 1. */
#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
-#endif
-/* Don't change these: Linux expects just these bits to be set. */
-/* (And that includes the bogus _PAGE_DIRTY!) */
#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
#define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#endif
#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
#define round_pgdown(_p) ((_p)&PAGE_MASK)
@@ -91,7 +91,11 @@ int construct_dom0(struct domain *d,
#elif defined(__x86_64__)
char *image_start = __va(_image_start);
char *initrd_start = __va(_initrd_start);
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
l4_pgentry_t *l4tab = NULL, *l4start = NULL;
+#endif
+#if CONFIG_PAGING_LEVELS >= 3
l3_pgentry_t *l3tab = NULL, *l3start = NULL;
#endif
l2_pgentry_t *l2tab = NULL, *l2start = NULL;
@@ -172,10 +176,15 @@ int construct_dom0(struct domain *d,
v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
if ( (v_end - vstack_end) < (512UL << 10) )
v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
-#if defined(__i386__)
+#if defined(__i386__) && !defined(CONFIG_X86_PAE)
if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >>
L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
break;
+#elif defined(__i386__) && defined(CONFIG_X86_PAE)
+ /* 5 pages: 1x 3rd + 4x 2nd level */
+ if ( (((v_end - dsi.v_start + ((1UL<<L2_PAGETABLE_SHIFT)-1)) >>
+ L2_PAGETABLE_SHIFT) + 5) <= nr_pt_pages )
+ break;
#elif defined(__x86_64__)
#define NR(_l,_h,_s) \
(((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
@@ -252,6 +261,24 @@ int construct_dom0(struct domain *d,
}
/* WARNING: The new domain must have its 'processor' field filled in! */
+#if CONFIG_PAGING_LEVELS == 3
+ l3start = l3tab = (l3_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
+ l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += 4*PAGE_SIZE;
+ memcpy(l2tab, idle_pg_table_l2, 4*PAGE_SIZE);
+ for (i = 0; i < 4; i++) {
+ l3tab[i] = l3e_create_phys((u32)l2tab + i*PAGE_SIZE, L3_PROT);
+ l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
+ l2e_create_phys((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
+ }
+ unsigned long v;
+ for (v = PERDOMAIN_VIRT_START; v < PERDOMAIN_VIRT_END;
+ v += (1 << L2_PAGETABLE_SHIFT)) {
+ l2tab[v >> L2_PAGETABLE_SHIFT] =
+ l2e_create_phys(__pa(d->arch.mm_perdomain_pt) +
(v-PERDOMAIN_VIRT_START),
+ __PAGE_HYPERVISOR);
+ }
+ ed->arch.guest_table = mk_pagetable((unsigned long)l3start);
+#else
l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
memcpy(l2tab, &idle_pg_table[0], PAGE_SIZE);
l2tab[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
@@ -259,8 +286,9 @@ int construct_dom0(struct domain *d,
l2tab[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
l2e_create_phys(__pa(d->arch.mm_perdomain_pt), __PAGE_HYPERVISOR);
ed->arch.guest_table = mk_pagetable((unsigned long)l2start);
+#endif
- l2tab += l2_table_offset(dsi.v_start);
+ l2tab += l2_linear_offset(dsi.v_start);
mfn = alloc_start >> PAGE_SHIFT;
for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
{
@@ -285,8 +313,8 @@ int construct_dom0(struct domain *d,
}
/* Pages that are part of page tables must be read only. */
- l2tab = l2start + l2_table_offset(vpt_start);
- l1start = l1tab = (l1_pgentry_t *)l2e_get_phys(*l2tab);
+ l2tab = l2start + l2_linear_offset(vpt_start);
+ l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_phys(*l2tab);
l1tab += l1_table_offset(vpt_start);
for ( count = 0; count < nr_pt_pages; count++ )
{
@@ -297,6 +325,34 @@ int construct_dom0(struct domain *d,
if ( !get_page_type(page, PGT_writable_page) )
BUG();
+#if CONFIG_PAGING_LEVELS == 3
+ switch (count) {
+ case 0:
+ page->u.inuse.type_info &= ~PGT_type_mask;
+ page->u.inuse.type_info |= PGT_l3_page_table;
+ get_page(page, d); /* an extra ref because of readable mapping */
+
+ /* Get another ref to L3 page so that it can be pinned. */
+ if ( !get_page_and_type(page, d, PGT_l3_page_table) )
+ BUG();
+ set_bit(_PGT_pinned, &page->u.inuse.type_info);
+ break;
+ case 1 ... 4:
+ page->u.inuse.type_info &= ~PGT_type_mask;
+ page->u.inuse.type_info |= PGT_l2_page_table;
+ page->u.inuse.type_info |=
+ (count-1) << PGT_va_shift;
+ get_page(page, d); /* an extra ref because of readable mapping */
+ break;
+ default:
+ page->u.inuse.type_info &= ~PGT_type_mask;
+ page->u.inuse.type_info |= PGT_l1_page_table;
+ page->u.inuse.type_info |=
+ ((dsi.v_start>>L2_PAGETABLE_SHIFT)+(count-5))<<PGT_va_shift;
+ get_page(page, d); /* an extra ref because of readable mapping */
+ break;
+ }
+#else
if ( count == 0 )
{
page->u.inuse.type_info &= ~PGT_type_mask;
@@ -329,8 +385,9 @@ int construct_dom0(struct domain *d,
*/
get_page(page, d); /* an extra ref because of readable mapping */
}
+#endif
if ( !((unsigned long)++l1tab & (PAGE_SIZE - 1)) )
- l1start = l1tab = (l1_pgentry_t *)l2e_get_phys(*++l2tab);
+ l1start = l1tab = (l1_pgentry_t *)(u32)l2e_get_phys(*++l2tab);
}
#elif defined(__x86_64__)
@@ -541,10 +598,8 @@ int construct_dom0(struct domain *d,
#if defined(__i386__)
/* Destroy low mappings - they were only for our convenience. */
- for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- if ( l2e_get_flags(l2start[i]) & _PAGE_PSE )
- l2start[i] = l2e_empty();
- zap_low_mappings(); /* Do the same for the idle page tables. */
+ zap_low_mappings(l2start);
+ zap_low_mappings(idle_pg_table_l2);
#endif
/* DOM0 gets access to everything. */
@@ -561,6 +616,12 @@ int construct_dom0(struct domain *d,
: SHM_enable));
if ( opt_dom0_translate )
{
+#if defined(__i386__) && defined(CONFIG_X86_PAE)
+ printk("FIXME: PAE code needed here: %s:%d (%s)\n",
+ __FILE__, __LINE__, __FUNCTION__);
+ for ( ; ; )
+ __asm__ __volatile__ ( "hlt" );
+#else
/* Hmm, what does this?
Looks like isn't portable across 32/64 bit and pae/non-pae ...
-- kraxel */
@@ -583,6 +644,7 @@ int construct_dom0(struct domain *d,
pagetable_get_pfn(ed->arch.guest_table));
idle_pg_table[1] = root_empty();
local_flush_tlb();
+#endif
}
update_pagetables(ed); /* XXX SMP */
Index: xen/include/asm-x86/page.h
===================================================================
--- xen.orig/include/asm-x86/page.h 2005-05-13 12:37:10.000000000 +0200
+++ xen/include/asm-x86/page.h 2005-05-13 12:47:49.000000000 +0200
@@ -9,10 +9,14 @@
#endif
#define PAGE_MASK (~(PAGE_SIZE-1))
+#ifndef __ASSEMBLY__
+# include <asm/types.h>
+#endif
+
#if defined(__i386__)
-#include <asm/x86_32/page.h>
+# include <asm/x86_32/page.h>
#elif defined(__x86_64__)
-#include <asm/x86_64/page.h>
+# include <asm/x86_64/page.h>
#endif
/* Convert a pointer to a page-table entry into pagetable slot index. */
@@ -21,9 +25,18 @@
/* Page-table type. */
#ifndef __ASSEMBLY__
-typedef struct { unsigned long pt_lo; } pagetable_t;
-#define pagetable_val(_x) ((_x).pt_lo)
-#define pagetable_get_pfn(_x) ((_x).pt_lo >> PAGE_SHIFT)
+#if CONFIG_PAGING_LEVELS == 2
+/* x86_32 default */
+typedef struct { u32 pt; } pagetable_t;
+#elif CONFIG_PAGING_LEVELS == 3
+/* x86_32 PAE */
+typedef struct { u32 pt; } pagetable_t; /* FIXME */
+#elif CONFIG_PAGING_LEVELS == 4
+/* x86_64 */
+typedef struct { u64 pt; } pagetable_t;
+#endif
+#define pagetable_val(_x) ((_x).pt)
+#define pagetable_get_pfn(_x) ((_x).pt >> PAGE_SHIFT)
#define mk_pagetable(_x) ( (pagetable_t) { (_x) } )
#endif
@@ -39,6 +52,7 @@ typedef struct { unsigned long pt_lo; }
#define pfn_valid(_pfn) ((_pfn) < max_page)
/* High table entries are reserved by the hypervisor. */
+/* FIXME: this breaks with PAE -- kraxel */
#define DOMAIN_ENTRIES_PER_L2_PAGETABLE \
(HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT)
#define HYPERVISOR_ENTRIES_PER_L2_PAGETABLE \
@@ -73,7 +87,14 @@ typedef struct { unsigned long pt_lo; }
#define va_to_l1mfn(_ed, _va) \
(l2e_get_pfn(linear_l2_table(_ed)[_va>>L2_PAGETABLE_SHIFT]))
+#if CONFIG_PAGING_LEVELS == 3
extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
+extern l3_pgentry_t idle_pg_table_l3[ROOT_PAGETABLE_ENTRIES];
+extern l2_pgentry_t
idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES*L2_PAGETABLE_ENTRIES];
+#else
+extern root_pgentry_t idle_pg_table[ROOT_PAGETABLE_ENTRIES];
+extern l2_pgentry_t idle_pg_table_l2[ROOT_PAGETABLE_ENTRIES];
+#endif
extern void paging_init(void);
@@ -131,6 +152,8 @@ static __inline__ int get_order(unsigned
return order;
}
+extern void printk_page_flags(u32 flags);
+
/* Map physical byte range (@p, @p+@s) at virt address @v in pagetable @pt. */
extern int
map_pages(
Index: xen/include/asm-x86/x86_32/page-2l.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen/include/asm-x86/x86_32/page-2l.h 2005-05-13 12:37:42.000000000
+0200
@@ -0,0 +1,109 @@
+#ifndef __X86_32_PAGE_2L_H__
+#define __X86_32_PAGE_2L_H__
+
+#define L1_PAGETABLE_SHIFT 12
+#define L2_PAGETABLE_SHIFT 22
+#define PAGE_SHIFT L1_PAGETABLE_SHIFT
+#define ROOT_PAGETABLE_SHIFT L2_PAGETABLE_SHIFT
+
+#define PAGETABLE_ORDER 10
+#define L1_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
+#define L2_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
+#define ROOT_PAGETABLE_ENTRIES L2_PAGETABLE_ENTRIES
+
+#define PADDR_BITS 32
+#define PADDR_MASK (~0UL)
+
+#ifndef __ASSEMBLY__
+#include <asm/types.h>
+typedef struct { u32 l1_lo; } l1_pgentry_t;
+typedef struct { u32 l2_lo; } l2_pgentry_t;
+typedef l2_pgentry_t root_pgentry_t;
+
+/* read access (depricated) */
+#define l1e_get_value(_x) ((u32)(_x).l1_lo)
+#define l2e_get_value(_x) ((u32)(_x).l2_lo)
+
+/* read access */
+#define l1e_get_pfn(_x) ((u32)((_x).l1_lo >> PAGE_SHIFT))
+#define l1e_get_phys(_x) ((u32)((_x).l1_lo & PAGE_MASK))
+#define l1e_get_flags(_x) ((u32)((_x).l1_lo & ~PAGE_MASK))
+
+#define l2e_get_pfn(_x) ((u32)((_x).l2_lo >> PAGE_SHIFT))
+#define l2e_get_phys(_x) ((u32)((_x).l2_lo & PAGE_MASK))
+#define l2e_get_flags(_x) ((u32)((_x).l2_lo & ~PAGE_MASK))
+
+/* write access */
+static inline l1_pgentry_t l1e_empty(void)
+{
+ l1_pgentry_t e = { .l1_lo = 0 };
+ return e;
+}
+static inline l1_pgentry_t l1e_create_pfn(u32 pfn, u32 flags)
+{
+ l1_pgentry_t e = { .l1_lo = (pfn << PAGE_SHIFT) | flags };
+ return e;
+}
+static inline l1_pgentry_t l1e_create_phys(u32 addr, u32 flags)
+{
+ l1_pgentry_t e = { .l1_lo = (addr & PAGE_MASK) | flags };
+ return e;
+}
+static inline void l1e_add_flags(l1_pgentry_t *e, u32 flags)
+{
+ e->l1_lo |= flags;
+}
+static inline void l1e_remove_flags(l1_pgentry_t *e, u32 flags)
+{
+ e->l1_lo &= ~flags;
+}
+
+static inline l2_pgentry_t l2e_empty(void)
+{
+ l2_pgentry_t e = { .l2_lo = 0 };
+ return e;
+}
+static inline l2_pgentry_t l2e_create_pfn(u32 pfn, u32 flags)
+{
+ l2_pgentry_t e = { .l2_lo = (pfn << PAGE_SHIFT) | flags };
+ return e;
+}
+static inline l2_pgentry_t l2e_create_phys(u32 addr, u32 flags)
+{
+ l2_pgentry_t e = { .l2_lo = (addr & PAGE_MASK) | flags };
+ return e;
+}
+static inline void l2e_add_flags(l2_pgentry_t *e, u32 flags)
+{
+ e->l2_lo |= flags;
+}
+static inline void l2e_remove_flags(l2_pgentry_t *e, u32 flags)
+{
+ e->l2_lo &= ~flags;
+}
+
+/* check entries */
+static inline int l1e_has_changed(l1_pgentry_t *e1, l1_pgentry_t *e2, u32
flags)
+{
+ return ((e1->l1_lo ^ e2->l1_lo) & (PAGE_MASK | flags)) != 0;
+}
+static inline int l2e_has_changed(l2_pgentry_t *e1, l2_pgentry_t *e2, u32
flags)
+{
+ return ((e1->l2_lo ^ e2->l2_lo) & (PAGE_MASK | flags)) != 0;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+/* root table */
+#define root_get_pfn l2e_get_pfn
+#define root_get_flags l2e_get_flags
+#define root_get_value l2e_get_value
+#define root_empty l2e_empty
+#define root_create_phys l2e_create_phys
+#define PGT_root_page_table PGT_l2_page_table
+
+/* misc */
+#define is_guest_l1_slot(_s) (1)
+#define is_guest_l2_slot(_t,_s) ((_s) < L2_PAGETABLE_FIRST_XEN_SLOT)
+
+#endif /* __X86_32_PAGE_2L_H__ */
Index: xen/include/asm-x86/x86_32/page-3l.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ xen/include/asm-x86/x86_32/page-3l.h 2005-05-13 14:02:31.000000000
+0200
@@ -0,0 +1,204 @@
+#ifndef __X86_32_PAGE_3L_H__
+#define __X86_32_PAGE_3L_H__
+
+#define L1_PAGETABLE_SHIFT 12
+#define L2_PAGETABLE_SHIFT 21
+#define L3_PAGETABLE_SHIFT 30
+#define PAGE_SHIFT L1_PAGETABLE_SHIFT
+#define ROOT_PAGETABLE_SHIFT L3_PAGETABLE_SHIFT
+
+#define PAGETABLE_ORDER 9
+#define L1_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
+#define L2_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
+#define L3_PAGETABLE_ENTRIES 4
+#define ROOT_PAGETABLE_ENTRIES L3_PAGETABLE_ENTRIES
+
+#define PADDR_BITS 32 /* FIXME */
+#define PADDR_MASK (~0UL) /* FIXME */
+
+#ifndef __ASSEMBLY__
+#include <asm/types.h>
+typedef struct { u32 l1_lo; u32 l1_hi; } l1_pgentry_t;
+typedef struct { u32 l2_lo; u32 l2_hi; } l2_pgentry_t;
+typedef struct { u32 l3_lo; u32 l3_hi; } l3_pgentry_t;
+typedef l3_pgentry_t root_pgentry_t;
+
+/* read access (depricated) */
+static inline u64 l1e_get_value(l1_pgentry_t x)
+{
+ return ((u64)x.l1_lo | (u64)x.l1_hi << 32);
+}
+static inline u64 l2e_get_value(l2_pgentry_t x)
+{
+ return ((u64)x.l2_lo | (u64)x.l2_hi << 32);
+}
+static inline u64 l3e_get_value(l3_pgentry_t x)
+{
+ return ((u64)x.l3_lo | (u64)x.l3_hi << 32);
+}
+
+
+/* read access */
+static inline unsigned long l1e_get_pfn(l1_pgentry_t x)
+{
+ return (((x.l1_hi & 0x0fULL) << (32-PAGE_SHIFT)) |
+ (x.l1_lo >> PAGE_SHIFT));
+}
+static inline u64 l1e_get_phys(l1_pgentry_t x)
+{
+ return ((((u64)x.l1_hi & 0x0fULL) << 32) |
+ ((u64)x.l1_lo & PAGE_MASK));
+}
+static inline unsigned long l1e_get_flags(l1_pgentry_t x)
+{
+ return (x.l1_lo & ~PAGE_MASK);
+}
+
+static inline unsigned long l2e_get_pfn(l2_pgentry_t x)
+{
+ return (((x.l2_hi & 0x0fULL) << (32-PAGE_SHIFT)) |
+ (x.l2_lo >> PAGE_SHIFT));
+}
+static inline u64 l2e_get_phys(l2_pgentry_t x)
+{
+ return ((((u64)x.l2_hi & 0x0fULL) << 32) |
+ ((u64)x.l2_lo & PAGE_MASK));
+}
+static inline unsigned long l2e_get_flags(l2_pgentry_t x)
+{
+ return (x.l2_lo & ~PAGE_MASK);
+}
+
+static inline unsigned long l3e_get_pfn(l3_pgentry_t x)
+{
+ return (((x.l3_hi & 0x0fULL) << (32-PAGE_SHIFT)) |
+ (x.l3_lo >> PAGE_SHIFT));
+}
+static inline u64 l3e_get_phys(l3_pgentry_t x)
+{
+ return ((((u64)x.l3_hi & 0x0fULL) << 32) |
+ ((u64)x.l3_lo & PAGE_MASK));
+}
+static inline unsigned long l3e_get_flags(l3_pgentry_t x)
+{
+ return (x.l3_lo & ~PAGE_MASK);
+}
+
+
+/* write access */
+static inline l1_pgentry_t l1e_empty(void)
+{
+ l1_pgentry_t e = { .l1_hi = 0,
+ .l1_lo = 0 };
+ return e;
+}
+static inline l1_pgentry_t l1e_create_pfn(u32 pfn, u32 flags)
+{
+ l1_pgentry_t e = { .l1_hi = (pfn >> (32-PAGE_SHIFT)) & 0x0f,
+ .l1_lo = (pfn << PAGE_SHIFT) | flags };
+ return e;
+}
+static inline l1_pgentry_t l1e_create_phys(u64 addr, u32 flags)
+{
+ l1_pgentry_t e = { .l1_hi = (u32)((addr >> 32) & 0x0f),
+ .l1_lo = (u32)((addr & PAGE_MASK)) | flags };
+ return e;
+}
+static inline void l1e_add_flags(l1_pgentry_t *e, u32 flags)
+{
+ e->l1_lo |= flags;
+}
+static inline void l1e_remove_flags(l1_pgentry_t *e, u32 flags)
+{
+ e->l1_lo &= ~flags;
+}
+
+static inline l2_pgentry_t l2e_empty(void)
+{
+ l2_pgentry_t e = { .l2_hi = 0,
+ .l2_lo = 0 };
+ return e;
+}
+static inline l2_pgentry_t l2e_create_pfn(u32 pfn, u32 flags)
+{
+ l2_pgentry_t e = { .l2_hi = (pfn >> (32-PAGE_SHIFT)) & 0x0f,
+ .l2_lo = (pfn << PAGE_SHIFT) | flags };
+ return e;
+}
+static inline l2_pgentry_t l2e_create_phys(u64 addr, u32 flags)
+{
+ l2_pgentry_t e = { .l2_hi = (u32)((addr >> 32) & 0x0f),
+ .l2_lo = (u32)((addr & PAGE_MASK)) | flags };
+ return e;
+}
+static inline void l2e_add_flags(l2_pgentry_t *e, u32 flags)
+{
+ e->l2_lo |= flags;
+}
+static inline void l2e_remove_flags(l2_pgentry_t *e, u32 flags)
+{
+ e->l2_lo &= ~flags;
+}
+
+static inline l3_pgentry_t l3e_empty(void)
+{
+ l3_pgentry_t e = { .l3_hi = 0,
+ .l3_lo = 0 };
+ return e;
+}
+static inline l3_pgentry_t l3e_create_pfn(u32 pfn, u32 flags)
+{
+ l3_pgentry_t e = { .l3_hi = (pfn >> (32-PAGE_SHIFT)) & 0x0f,
+ .l3_lo = (pfn << PAGE_SHIFT) | flags };
+ return e;
+}
+static inline l3_pgentry_t l3e_create_phys(u64 addr, u32 flags)
+{
+ l3_pgentry_t e = { .l3_hi = (u32)((addr >> 32) & 0x0f),
+ .l3_lo = (u32)((addr & PAGE_MASK)) | flags };
+ return e;
+}
+static inline void l3e_add_flags(l3_pgentry_t *e, u32 flags)
+{
+ e->l3_lo |= flags;
+}
+static inline void l3e_remove_flags(l3_pgentry_t *e, u32 flags)
+{
+ e->l3_lo &= ~flags;
+}
+
+/* check entries */
+static inline int l1e_has_changed(l1_pgentry_t *e1, l1_pgentry_t *e2, u32
flags)
+{
+ return ( ((e1->l1_lo ^ e2->l1_lo) & (PAGE_MASK | flags)) != 0 ||
+ ((e1->l1_hi ^ e2->l1_hi) & 0x0f ) != 0 );
+}
+static inline int l2e_has_changed(l2_pgentry_t *e1, l2_pgentry_t *e2, u32
flags)
+{
+ return ( ((e1->l2_lo ^ e2->l2_lo) & (PAGE_MASK | flags)) != 0 ||
+ ((e1->l2_hi ^ e2->l2_hi) & 0x0f ) != 0 );
+}
+static inline int l3e_has_changed(l3_pgentry_t *e1, l3_pgentry_t *e2, u32
flags)
+{
+ return ( ((e1->l3_lo ^ e2->l3_lo) & (PAGE_MASK | flags)) != 0 ||
+ ((e1->l3_hi ^ e2->l3_hi) & 0x0f ) != 0 );
+}
+
+#endif /* !__ASSEMBLY__ */
+
+/* root table */
+#define root_get_pfn l3e_get_pfn
+#define root_get_flags l3e_get_flags
+#define root_get_value l3e_get_value
+#define root_empty l3e_empty
+#define root_init_phys l3e_create_phys
+#define PGT_root_page_table PGT_l3_page_table
+
+/* misc */
+#define is_guest_l1_slot(_s) (1)
+#define is_guest_l2_slot(_t,_s) \
+ ((3 != (((_t) & PGT_va_mask) >> PGT_va_shift)) || \
+ ((_s) < (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1))))
+#define is_guest_l3_slot(_s) (1)
+
+#endif /* __X86_32_PAGE_3L_H__ */
Index: xen/arch/x86/boot/x86_32.S
===================================================================
--- xen.orig/arch/x86/boot/x86_32.S 2005-05-13 12:37:09.000000000 +0200
+++ xen/arch/x86/boot/x86_32.S 2005-05-13 12:37:42.000000000 +0200
@@ -100,6 +100,22 @@ __start:
xor %eax,%eax
rep stosb
+#ifdef CONFIG_X86_PAE
+ /* Initialize low and high mappings of all memory with 2MB pages */
+ mov $idle_pg_table_l2-__PAGE_OFFSET,%edi
+ mov $0xe3,%eax /* PRESENT+RW+A+D+2MB */
+1: mov %eax,__PAGE_OFFSET>>18(%edi) /* high mapping */
+ stosl /* low mapping */
+ add $4,%edi
+ add $(1<<L2_PAGETABLE_SHIFT),%eax
+ cmp $DIRECTMAP_PHYS_END+0xe3,%eax
+ jne 1b
+1: stosl /* low mappings cover as much physmem as possible */
+ add $4,%edi
+ add $(1<<L2_PAGETABLE_SHIFT),%eax
+ cmp $__HYPERVISOR_VIRT_START+0xe3,%eax
+ jne 1b
+#else
/* Initialize low and high mappings of all memory with 4MB pages */
mov $idle_pg_table-__PAGE_OFFSET,%edi
mov $0xe3,%eax /* PRESENT+RW+A+D+4MB */
@@ -112,6 +128,7 @@ __start:
add $(1<<L2_PAGETABLE_SHIFT),%eax
cmp $__HYPERVISOR_VIRT_START+0xe3,%eax
jne 1b
+#endif
/* Initialise IDT with simple error defaults. */
lea ignore_int,%edx
@@ -204,17 +221,47 @@ ENTRY(gdt_table)
.quad 0x0000000000000000 /* unused */
.quad 0x00cf9a000000ffff /* 0x0808 ring 0 4.00GB code at 0x0 */
.quad 0x00cf92000000ffff /* 0x0810 ring 0 4.00GB data at 0x0 */
+#ifdef CONFIG_X86_PAE
+ .quad 0x00cfba00000067ff
+ .quad 0x00cfb200000067ff
+ .quad 0x00cffa00000067ff
+ .quad 0x00cff200000067ff
+#else
.quad 0x00cfba000000c3ff /* 0x0819 ring 1 3.95GB code at 0x0 */
.quad 0x00cfb2000000c3ff /* 0x0821 ring 1 3.95GB data at 0x0 */
.quad 0x00cffa000000c3ff /* 0x082b ring 3 3.95GB code at 0x0 */
.quad 0x00cff2000000c3ff /* 0x0833 ring 3 3.95GB data at 0x0 */
+#endif
.quad 0x0000000000000000 /* unused */
.fill 2*NR_CPUS,8,0 /* space for TSS and LDT per CPU */
+#ifdef CONFIG_X86_PAE
+
.org 0x1000
-ENTRY(idle_pg_table) # Initial page directory is 4kB
+ENTRY(idle_pg_table)
+ENTRY(idle_pg_table_l3)
+ .quad 0x100000 + 0x2000 + 0x01
+ .quad 0x100000 + 0x3000 + 0x01
+ .quad 0x100000 + 0x4000 + 0x01
+ .quad 0x100000 + 0x5000 + 0x01
+
+ .org 0x2000
+ENTRY(idle_pg_table_l2)
+
+ .org 0x6000
+ENTRY(cpu0_stack)
+ .org 0x6000 + STACK_SIZE
+
+#else /* CONFIG_X86_PAE */
+
+ .org 0x1000
+ENTRY(idle_pg_table)
+ENTRY(idle_pg_table_l2) # Initial page directory is 4kB
.org 0x2000
ENTRY(cpu0_stack)
.org 0x2000 + STACK_SIZE
+
+#endif /* CONFIG_X86_PAE */
+
ENTRY(stext)
ENTRY(_stext)
Index: xen/arch/x86/mm.c
===================================================================
--- xen.orig/arch/x86/mm.c 2005-05-13 12:37:10.000000000 +0200
+++ xen/arch/x86/mm.c 2005-05-13 12:49:06.000000000 +0200
@@ -121,7 +121,8 @@
static void free_l2_table(struct pfn_info *page);
static void free_l1_table(struct pfn_info *page);
-static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long);
+static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long,
+ unsigned int type);
static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t);
/* Used to defer flushing of memory structures. */
@@ -371,7 +372,6 @@ static int get_page_and_type_from_pagenr
return 1;
}
-
/*
* We allow root tables to map each other (a.k.a. linear page tables). It
* needs some special care with reference counts and access permissions:
@@ -428,7 +428,6 @@ get_linear_pagetable(
return 1;
}
-
int
get_page_from_l1e(
l1_pgentry_t l1e, struct domain *d)
@@ -442,8 +441,9 @@ get_page_from_l1e(
if ( unlikely(l1e_get_flags(l1e) & L1_DISALLOW_MASK) )
{
- MEM_LOG("Bad L1 type settings %lx %lx", l1e_get_value(l1e),
- l1e_get_value(l1e) & L1_DISALLOW_MASK);
+ MEM_LOG("Bad L1 type settings %llx %llx",
+ (u64)l1e_get_value(l1e),
+ (u64)(l1e_get_value(l1e) & L1_DISALLOW_MASK));
return 0;
}
@@ -478,7 +478,7 @@ get_page_from_l1e(
static int
get_page_from_l2e(
l2_pgentry_t l2e, unsigned long pfn,
- struct domain *d, unsigned long va_idx)
+ struct domain *d, unsigned long vaddr)
{
int rc;
@@ -489,45 +489,60 @@ get_page_from_l2e(
if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
{
- MEM_LOG("Bad L2 page type settings %lx",
- l2e_get_value(l2e) & L2_DISALLOW_MASK);
+ MEM_LOG("Bad L2 page type settings %llx",
+ (u64)(l2e_get_value(l2e) & L2_DISALLOW_MASK));
return 0;
}
+ vaddr >>= L2_PAGETABLE_SHIFT;
+ vaddr <<= PGT_va_shift;
rc = get_page_and_type_from_pagenr(
- l2e_get_pfn(l2e),
- PGT_l1_page_table | (va_idx<<PGT_va_shift), d);
+ l2e_get_pfn(l2e), PGT_l1_page_table | vaddr, d);
-#if defined(__i386__)
- return rc ? rc : get_linear_pagetable(l2e, pfn, d);
-#elif defined(__x86_64__)
- return rc;
+#if CONFIG_PAGING_LEVELS == 2
+ if (!rc)
+ rc = get_linear_pagetable(l2e, pfn, d);
#endif
+ return rc;
}
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
static int
get_page_from_l3e(
- l3_pgentry_t l3e, unsigned long pfn, struct domain *d)
+ l3_pgentry_t l3e, unsigned long pfn,
+ struct domain *d, unsigned long vaddr)
{
ASSERT( !shadow_mode_refcounts(d) );
+ int rc;
+
if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
return 1;
if ( unlikely((l3e_get_flags(l3e) & L3_DISALLOW_MASK)) )
{
- MEM_LOG("Bad L3 page type settings %lx",
- l3e_get_value(l3e) & L3_DISALLOW_MASK);
+ MEM_LOG("Bad L3 page type settings %llx",
+ (u64)(l3e_get_value(l3e) & L3_DISALLOW_MASK));
return 0;
}
- return get_page_and_type_from_pagenr(
- l3e_get_pfn(l3e), PGT_l2_page_table, d);
+ vaddr >>= L3_PAGETABLE_SHIFT;
+ vaddr <<= PGT_va_shift;
+ rc = get_page_and_type_from_pagenr(
+ l3e_get_pfn(l3e),
+ PGT_l2_page_table | vaddr, d);
+#if CONFIG_PAGING_LEVELS == 3
+ if (!rc)
+ rc = get_linear_pagetable(l3e, pfn, d);
+#endif
+ return rc;
}
+#endif /* 3 level */
+
+#if CONFIG_PAGING_LEVELS >= 4
static int
get_page_from_l4e(
@@ -556,7 +571,7 @@ get_page_from_l4e(
return 1;
}
-#endif /* __x86_64__ */
+#endif /* 4 level */
void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
@@ -618,7 +633,7 @@ static void put_page_from_l2e(l2_pgentry
}
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
static void put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn)
{
@@ -627,6 +642,9 @@ static void put_page_from_l3e(l3_pgentry
put_page_and_type(&frame_table[l3e_get_pfn(l3e)]);
}
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4
static void put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn)
{
@@ -635,7 +653,7 @@ static void put_page_from_l4e(l4_pgentry
put_page_and_type(&frame_table[l4e_get_pfn(l4e)]);
}
-#endif /* __x86_64__ */
+#endif
static int alloc_l1_table(struct pfn_info *page)
@@ -666,11 +684,58 @@ static int alloc_l1_table(struct pfn_inf
return 0;
}
+#ifdef CONFIG_X86_PAE
+static inline int fixup_pae_linear_mappings(l3_pgentry_t *pl3e)
+{
+ l2_pgentry_t *pl2e;
+ unsigned long vaddr;
+ int i,idx;
-static int alloc_l2_table(struct pfn_info *page)
+ while ((unsigned long)pl3e & ~PAGE_MASK)
+ pl3e--;
+
+ if (!(l3e_get_flags(pl3e[3]) & _PAGE_PRESENT)) {
+ printk("Installing a L3 PAE pt without L2 in slot #3 isn't going to
fly ...\n");
+ return 0;
+ }
+
+ pl2e = map_domain_mem(l3e_get_phys(pl3e[3]));
+ for (i = 0; i < 4; i++) {
+ vaddr = LINEAR_PT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
+ idx = (vaddr >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES-1);
+ if (l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) {
+ pl2e[idx] = l2e_create_phys(l3e_get_phys(pl3e[i]),
+ __PAGE_HYPERVISOR);
+ } else
+ pl2e[idx] = l2e_empty();
+ }
+ unmap_domain_mem(pl2e);
+
+ return 1;
+}
+
+static inline unsigned long fixup_pae_vaddr(unsigned long l2vaddr,
+ unsigned long l2type)
+{
+ unsigned long l3vaddr;
+
+ if ((l2type & PGT_va_mask) == PGT_va_unknown)
+ BUG(); /* FIXME: do something more elegant here ... */
+ l3vaddr = ((l2type & PGT_va_mask) >> PGT_va_shift)
+ << L3_PAGETABLE_SHIFT;
+ return l3vaddr + l2vaddr;
+}
+
+#else
+# define fixup_pae_linear_mappings(unused) (1)
+# define fixup_pae_vaddr(vaddr, type) (vaddr)
+#endif
+
+static int alloc_l2_table(struct pfn_info *page, unsigned int type)
{
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_pfn(page);
+ unsigned long vaddr;
l2_pgentry_t *pl2e;
int i;
@@ -678,34 +743,55 @@ static int alloc_l2_table(struct pfn_inf
if ( (PGT_base_page_table == PGT_l2_page_table) &&
unlikely(shadow_mode_refcounts(d)) )
return 1;
-
ASSERT( !shadow_mode_refcounts(d) );
+
pl2e = map_domain_mem(pfn << PAGE_SHIFT);
- for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
- if ( is_guest_l2_slot(i) &&
- unlikely(!get_page_from_l2e(pl2e[i], pfn, d, i)) )
+ for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) {
+ vaddr = i << L2_PAGETABLE_SHIFT;
+ vaddr = fixup_pae_vaddr(vaddr,type);
+ if ( is_guest_l2_slot(type, i) &&
+ unlikely(!get_page_from_l2e(pl2e[i], pfn, d, vaddr)) )
goto fail;
+ }
-#if defined(__i386__)
+#if CONFIG_PAGING_LEVELS == 2
/* Xen private mappings. */
- memcpy(&pl2e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
- &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
- ROOT_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+ memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
+ &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
+ L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
pl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
l2e_create_pfn(pfn, __PAGE_HYPERVISOR);
pl2e[l2_table_offset(PERDOMAIN_VIRT_START)] =
l2e_create_phys(__pa(page_get_owner(page)->arch.mm_perdomain_pt),
__PAGE_HYPERVISOR);
#endif
+#if CONFIG_PAGING_LEVELS == 3
+ if (3 == ((type & PGT_va_mask) >> PGT_va_shift)) {
+ unsigned long v,src,dst;
+ /* Xen private mappings. */
+ dst = L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1);
+ src = L2_PAGETABLE_FIRST_XEN_SLOT;
+ memcpy(&pl2e[dst], &idle_pg_table_l2[src],
+ L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
+ for (v = PERDOMAIN_VIRT_START; v < PERDOMAIN_VIRT_END;
+ v += (1 << L2_PAGETABLE_SHIFT)) {
+ dst = (v >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES-1);
+ pl2e[dst] =
+ l2e_create_phys(__pa(d->arch.mm_perdomain_pt) +
(v-PERDOMAIN_VIRT_START),
+ __PAGE_HYPERVISOR);
+ }
+ /* see fixup_pae_linear_mappings() for linear pagetables */
+ }
+#endif
unmap_domain_mem(pl2e);
return 1;
fail:
while ( i-- > 0 )
- if ( is_guest_l2_slot(i) )
+ if ( is_guest_l2_slot(type, i) )
put_page_from_l2e(pl2e[i], pfn);
unmap_domain_mem(pl2e);
@@ -713,22 +799,29 @@ static int alloc_l2_table(struct pfn_inf
}
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
static int alloc_l3_table(struct pfn_info *page)
{
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_pfn(page);
- l3_pgentry_t *pl3e = page_to_virt(page);
+ unsigned long vaddr;
+ l3_pgentry_t *pl3e;
int i;
ASSERT( !shadow_mode_refcounts(d) );
- for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
+ pl3e = map_domain_mem(pfn << PAGE_SHIFT);
+ for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) {
+ vaddr = i << L3_PAGETABLE_SHIFT;
if ( is_guest_l3_slot(i) &&
- unlikely(!get_page_from_l3e(pl3e[i], pfn, d)) )
+ unlikely(!get_page_from_l3e(pl3e[i], pfn, d, vaddr)) )
goto fail;
+ }
+ if (!fixup_pae_linear_mappings(pl3e))
+ goto fail;
+ unmap_domain_mem(pl3e);
return 1;
fail:
@@ -736,9 +829,13 @@ static int alloc_l3_table(struct pfn_inf
if ( is_guest_l3_slot(i) )
put_page_from_l3e(pl3e[i], pfn);
+ unmap_domain_mem(pl3e);
return 0;
}
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4
static int alloc_l4_table(struct pfn_info *page)
{
@@ -807,27 +904,35 @@ static void free_l2_table(struct pfn_inf
pl2e = map_domain_mem(pfn << PAGE_SHIFT);
- for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
- if ( is_guest_l2_slot(i) )
+ for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ ) {
+ if ( is_guest_l2_slot(page->u.inuse.type_info, i) )
put_page_from_l2e(pl2e[i], pfn);
+ }
unmap_domain_mem(pl2e);
}
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
static void free_l3_table(struct pfn_info *page)
{
unsigned long pfn = page_to_pfn(page);
- l3_pgentry_t *pl3e = page_to_virt(page);
+ l3_pgentry_t *pl3e;
int i;
+ pl3e = map_domain_mem(pfn << PAGE_SHIFT);
+
for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
if ( is_guest_l3_slot(i) )
put_page_from_l3e(pl3e[i], pfn);
+
+ unmap_domain_mem(pl3e);
}
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4
static void free_l4_table(struct pfn_info *page)
{
@@ -840,25 +945,29 @@ static void free_l4_table(struct pfn_inf
put_page_from_l4e(pl4e[i], pfn);
}
-#endif /* __x86_64__ */
-
+#endif
static inline int update_l1e(l1_pgentry_t *pl1e,
l1_pgentry_t ol1e,
l1_pgentry_t nl1e)
{
- /* FIXME: breaks with PAE */
+#if defined(__i386__) && defined(CONFIG_X86_PAE)
+ u64 o = l1e_get_value(ol1e);
+ u64 n = l1e_get_value(nl1e);
+#else
unsigned long o = l1e_get_value(ol1e);
unsigned long n = l1e_get_value(nl1e);
+#endif
if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) ||
unlikely(o != l1e_get_value(ol1e)) )
{
- MEM_LOG("Failed to update %lx -> %lx: saw %lx",
- l1e_get_value(ol1e), l1e_get_value(nl1e), o);
+ MEM_LOG("Failed to update %llx -> %llx: saw %llx",
+ (u64)l1e_get_value(ol1e),
+ (u64)l1e_get_value(nl1e),
+ (u64)o);
return 0;
}
-
return 1;
}
@@ -879,8 +988,8 @@ static int mod_l1_entry(l1_pgentry_t *pl
{
if ( unlikely(l1e_get_flags(nl1e) & L1_DISALLOW_MASK) )
{
- MEM_LOG("Bad L1 type settings %lx",
- l1e_get_value(nl1e) & L1_DISALLOW_MASK);
+ MEM_LOG("Bad L1 type settings %llx",
+ (u64)(l1e_get_value(nl1e) & L1_DISALLOW_MASK));
return 0;
}
@@ -913,19 +1022,23 @@ static int mod_l1_entry(l1_pgentry_t *pl
_t ## e_get_value(_o), \
_t ## e_get_value(_n)); \
if ( __o != _t ## e_get_value(_o) ) \
- MEM_LOG("Failed to update %lx -> %lx: saw %lx", \
- _t ## e_get_value(_o), _t ## e_get_value(_n), __o); \
+ MEM_LOG("Failed to update %llx -> %llx: saw %llx", \
+ (u64)(_t ## e_get_value(_o)), \
+ (u64)(_t ## e_get_value(_n)), \
+ (u64)(__o)); \
(__o == _t ## e_get_value(_o)); })
/* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
static int mod_l2_entry(l2_pgentry_t *pl2e,
l2_pgentry_t nl2e,
- unsigned long pfn)
+ unsigned long pfn,
+ unsigned int type)
{
l2_pgentry_t ol2e;
+ unsigned long vaddr;
- if ( unlikely(!is_guest_l2_slot(pgentry_ptr_to_slot(pl2e))) )
+ if ( unlikely(!is_guest_l2_slot(type,pgentry_ptr_to_slot(pl2e))) )
{
MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e);
return 0;
@@ -938,8 +1051,8 @@ static int mod_l2_entry(l2_pgentry_t *pl
{
if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
{
- MEM_LOG("Bad L2 type settings %lx",
- l2e_get_value(nl2e) & L2_DISALLOW_MASK);
+ MEM_LOG("Bad L2 type settings %llx",
+ (u64)(l2e_get_value(nl2e) & L2_DISALLOW_MASK));
return 0;
}
@@ -947,9 +1060,10 @@ static int mod_l2_entry(l2_pgentry_t *pl
if ( !l2e_has_changed(&ol2e, &nl2e, _PAGE_PRESENT))
return UPDATE_ENTRY(l2, pl2e, ol2e, nl2e);
- if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain,
- ((unsigned long)pl2e &
- ~PAGE_MASK) >> 2)) )
+ vaddr = (((unsigned long)pl2e & ~PAGE_MASK) / sizeof(l2_pgentry_t))
+ << L2_PAGETABLE_SHIFT;
+ vaddr = fixup_pae_vaddr(vaddr,type);
+ if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain, vaddr)) )
return 0;
if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e)) )
@@ -969,7 +1083,7 @@ static int mod_l2_entry(l2_pgentry_t *pl
}
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
/* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
static int mod_l3_entry(l3_pgentry_t *pl3e,
@@ -977,6 +1091,7 @@ static int mod_l3_entry(l3_pgentry_t *pl
unsigned long pfn)
{
l3_pgentry_t ol3e;
+ unsigned long vaddr;
if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
{
@@ -991,8 +1106,8 @@ static int mod_l3_entry(l3_pgentry_t *pl
{
if ( unlikely(l3e_get_flags(nl3e) & L3_DISALLOW_MASK) )
{
- MEM_LOG("Bad L3 type settings %lx",
- l3e_get_value(nl3e) & L3_DISALLOW_MASK);
+ MEM_LOG("Bad L3 type settings %llx",
+ (u64)(l3e_get_value(nl3e) & L3_DISALLOW_MASK));
return 0;
}
@@ -1000,26 +1115,33 @@ static int mod_l3_entry(l3_pgentry_t *pl
if (!l3e_has_changed(&ol3e, &nl3e, _PAGE_PRESENT))
return UPDATE_ENTRY(l3, pl3e, ol3e, nl3e);
- if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain)) )
+ vaddr = (((unsigned long)pl3e & ~PAGE_MASK) / sizeof(l3_pgentry_t))
+ << L3_PAGETABLE_SHIFT;
+ if ( unlikely(!get_page_from_l3e(nl3e, pfn, current->domain, vaddr)) )
return 0;
- if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
+ if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e) ||
+ !fixup_pae_linear_mappings(pl3e)) )
{
put_page_from_l3e(nl3e, pfn);
return 0;
}
-
+
put_page_from_l3e(ol3e, pfn);
return 1;
}
- if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e)) )
+ if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e) ||
+ !fixup_pae_linear_mappings(pl3e)) )
return 0;
put_page_from_l3e(ol3e, pfn);
return 1;
}
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 4
/* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
static int mod_l4_entry(l4_pgentry_t *pl4e,
@@ -1070,20 +1192,21 @@ static int mod_l4_entry(l4_pgentry_t *pl
return 1;
}
-#endif /* __x86_64__ */
-
+#endif
int alloc_page_type(struct pfn_info *page, unsigned int type)
{
- switch ( type )
+ switch ( type & PGT_type_mask )
{
case PGT_l1_page_table:
return alloc_l1_table(page);
case PGT_l2_page_table:
- return alloc_l2_table(page);
-#ifdef __x86_64__
+ return alloc_l2_table(page, type);
+#if CONFIG_PAGING_LEVELS >= 3
case PGT_l3_page_table:
return alloc_l3_table(page);
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
case PGT_l4_page_table:
return alloc_l4_table(page);
#endif
@@ -1118,7 +1241,7 @@ void free_page_type(struct pfn_info *pag
}
}
- switch ( type )
+ switch (type & PGT_type_mask)
{
case PGT_l1_page_table:
free_l1_table(page);
@@ -1128,17 +1251,21 @@ void free_page_type(struct pfn_info *pag
free_l2_table(page);
break;
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
case PGT_l3_page_table:
free_l3_table(page);
break;
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
case PGT_l4_page_table:
free_l4_table(page);
break;
#endif
default:
+ printk("%s: type %x pfn %lx\n",__FUNCTION__,
+ type, page_to_pfn(page));
BUG();
}
}
@@ -1181,7 +1308,7 @@ void put_page_type(struct pfn_info *page
x & ~PGT_validated)) != x) )
goto again;
/* We cleared the 'valid bit' so we do the clean up. */
- free_page_type(page, x & PGT_type_mask);
+ free_page_type(page, x);
/* Carry on, but with the 'valid bit' now clear. */
x &= ~PGT_validated;
nx &= ~PGT_validated;
@@ -1264,6 +1391,10 @@ int get_page_type(struct pfn_info *page,
/* This table is may be mapped at multiple locations. */
nx &= ~PGT_va_mask;
nx |= PGT_va_unknown;
+#if 0 /* debug */
+ printk("%s: pfn %lx type %x -> %x (tag as unknown)\n",
+ __FUNCTION__,page_to_pfn(page),x,nx);
+#endif
}
}
if ( unlikely(!(x & PGT_validated)) )
@@ -1280,7 +1411,7 @@ int get_page_type(struct pfn_info *page,
if ( unlikely(!(nx & PGT_validated)) )
{
/* Try to validate page type; drop the new reference on failure. */
- if ( unlikely(!alloc_page_type(page, type & PGT_type_mask)) )
+ if ( unlikely(!alloc_page_type(page, type)) )
{
MEM_LOG("Error while validating pfn %lx for type %08x."
" caf=%08x taf=%08x",
@@ -1531,15 +1662,17 @@ int do_mmuext_op(
type = PGT_l2_page_table;
goto pin_page;
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
case MMUEXT_PIN_L3_TABLE:
type = PGT_l3_page_table;
goto pin_page;
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
case MMUEXT_PIN_L4_TABLE:
type = PGT_l4_page_table;
goto pin_page;
-#endif /* __x86_64__ */
+#endif
case MMUEXT_UNPIN_TABLE:
if ( unlikely(!(okay = get_page_from_pagenr(op.mfn, FOREIGNDOM))) )
@@ -1906,19 +2039,20 @@ int do_mmu_update(
break;
case PGT_l2_page_table:
ASSERT( !shadow_mode_refcounts(d) );
- if ( likely(get_page_type(page, PGT_l2_page_table)) )
+ if ( likely(get_page_type(
+ page, type_info & (PGT_type_mask|PGT_va_mask))) )
{
l2_pgentry_t l2e;
/* FIXME: doesn't work with PAE */
l2e = l2e_create_phys(req.val, req.val);
- okay = mod_l2_entry(va, l2e, mfn);
+ okay = mod_l2_entry((l2_pgentry_t *)va, l2e, mfn,
type_info);
if ( okay && unlikely(shadow_mode_enabled(d)) )
shadow_l2_normal_pt_update(d, req.ptr, l2e,
&sh_mapcache);
put_page_type(page);
}
break;
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
case PGT_l3_page_table:
ASSERT( !shadow_mode_refcounts(d) );
if ( likely(get_page_type(page, PGT_l3_page_table)) )
@@ -1933,6 +2067,8 @@ int do_mmu_update(
put_page_type(page);
}
break;
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
case PGT_l4_page_table:
ASSERT( !shadow_mode_refcounts(d) );
if ( likely(get_page_type(page, PGT_l4_page_table)) )
@@ -1946,7 +2082,7 @@ int do_mmu_update(
put_page_type(page);
}
break;
-#endif /* __x86_64__ */
+#endif
default:
if ( likely(get_page_type(page, PGT_writable_page)) )
{
@@ -2113,9 +2249,10 @@ int update_grant_va_mapping(unsigned lon
int do_update_va_mapping(unsigned long va,
- l1_pgentry_t val,
+ unsigned long val32,
unsigned long flags)
{
+ l1_pgentry_t val = l1e_create_phys(val32,val32);
struct exec_domain *ed = current;
struct domain *d = ed->domain;
unsigned int cpu = ed->processor;
@@ -2210,7 +2347,7 @@ int do_update_va_mapping(unsigned long v
}
int do_update_va_mapping_otherdomain(unsigned long va,
- l1_pgentry_t val,
+ unsigned long val32,
unsigned long flags,
domid_t domid)
{
@@ -2228,7 +2365,7 @@ int do_update_va_mapping_otherdomain(uns
return -ESRCH;
}
- rc = do_update_va_mapping(va, val, flags);
+ rc = do_update_va_mapping(va, val32, flags);
return rc;
}
@@ -2582,8 +2719,8 @@ void ptwr_flush(struct domain *d, const
static int ptwr_emulated_update(
unsigned long addr,
- unsigned long old,
- unsigned long val,
+ physaddr_t old,
+ physaddr_t val,
unsigned int bytes,
unsigned int do_cmpxchg)
{
@@ -2601,21 +2738,22 @@ static int ptwr_emulated_update(
}
/* Turn a sub-word access into a full-word access. */
- /* FIXME: needs tweaks for PAE */
- if ( (addr & ((BITS_PER_LONG/8)-1)) != 0 )
+ if (bytes != sizeof(physaddr_t))
{
int rc;
- unsigned long full;
- unsigned int mask = addr & ((BITS_PER_LONG/8)-1);
+ physaddr_t full;
+ unsigned int offset = addr & (sizeof(physaddr_t)-1);
+
/* Align address; read full word. */
- addr &= ~((BITS_PER_LONG/8)-1);
- if ( (rc = x86_emulate_read_std(addr, &full, BITS_PER_LONG/8)) )
- return rc;
+ addr &= ~(sizeof(physaddr_t)-1);
+ if ( (rc = x86_emulate_read_std(addr, (unsigned long *)&full,
+ sizeof(physaddr_t))) )
+ return rc;
/* Mask out bits provided by caller. */
- full &= ~((1UL << (bytes*8)) - 1UL) << (mask*8);
+ full &= ~((((physaddr_t)1 << (bytes*8)) - 1) << (offset*8));
/* Shift the caller value and OR in the missing bits. */
- val &= (1UL << (bytes*8)) - 1UL;
- val <<= mask*8;
+ val &= (((physaddr_t)1 << (bytes*8)) - 1);
+ val <<= (offset)*8;
val |= full;
}
@@ -2635,8 +2773,8 @@ static int ptwr_emulated_update(
((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
(page_get_owner(page) != d) )
{
- MEM_LOG("ptwr_emulate: Page is mistyped or bad pte (%lx, %08x)\n",
- l1e_get_pfn(pte), page->u.inuse.type_info);
+ MEM_LOG("ptwr_emulate: Page is mistyped or bad pte (%llx, %08x)\n",
+ (u64)l1e_get_pfn(pte), page->u.inuse.type_info);
return X86EMUL_UNHANDLEABLE;
}
@@ -2870,7 +3008,23 @@ void ptwr_destroy(struct domain *d)
free_xenheap_page((unsigned long)d->arch.ptwr[PTWR_PT_INACTIVE].page);
}
+/* for printk debugging ;) */
+void printk_page_flags(u32 flags)
+{
+ static const char *names[12] = {
+ "present", "rw", "user", "pwt",
+ "pcd", "accessed", "dirty", "pat/pse",
+ "global", "os#1", "os#2", "os#3"
+ };
+ int i, first = 1;
+ for (i = 11; i >= 0; i--) {
+ if (!(flags & (1<<i)))
+ continue;
+ printk("%s%s", first ? "flags=" :",", names[i]);
+ first=0;
+ }
+}
/************************************************************************/
/************************************************************************/
Index: xen/include/asm-x86/x86_32/page.h
===================================================================
--- xen.orig/include/asm-x86/x86_32/page.h 2005-05-13 12:37:10.000000000
+0200
+++ xen/include/asm-x86/x86_32/page.h 2005-05-13 12:37:42.000000000 +0200
@@ -2,129 +2,38 @@
#ifndef __X86_32_PAGE_H__
#define __X86_32_PAGE_H__
-#define L1_PAGETABLE_SHIFT 12
-#define L2_PAGETABLE_SHIFT 22
-#define PAGE_SHIFT L1_PAGETABLE_SHIFT
-#define ROOT_PAGETABLE_SHIFT L2_PAGETABLE_SHIFT
-
-#define PAGETABLE_ORDER 10
-#define L1_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
-#define L2_PAGETABLE_ENTRIES (1<<PAGETABLE_ORDER)
-#define ROOT_PAGETABLE_ENTRIES L2_PAGETABLE_ENTRIES
-
#define __PAGE_OFFSET (0xFF000000)
-#define PADDR_BITS 32
-#define VADDR_BITS 32
#define PADDR_MASK (~0UL)
#define VADDR_MASK (~0UL)
#define _PAGE_NX 0UL
#define PAGE_FLAG_MASK 0xfff
-#ifndef __ASSEMBLY__
#include <xen/config.h>
-#include <asm/types.h>
-typedef struct { u32 l1_lo; } l1_pgentry_t;
-typedef struct { u32 l2_lo; } l2_pgentry_t;
-typedef l2_pgentry_t root_pgentry_t;
-
-/* read access (deprecated) */
-#define l1e_get_value(_x) ((unsigned long)((_x).l1_lo))
-#define l2e_get_value(_x) ((unsigned long)((_x).l2_lo))
-
-/* read access */
-#define l1e_get_pfn(_x) ((unsigned long)((_x).l1_lo >> PAGE_SHIFT))
-#define l1e_get_phys(_x) ((unsigned long)((_x).l1_lo & PAGE_MASK))
-#define l1e_get_flags(_x) ((unsigned long)((_x).l1_lo & PAGE_FLAG_MASK))
-
-#define l2e_get_pfn(_x) ((unsigned long)((_x).l2_lo >> PAGE_SHIFT))
-#define l2e_get_phys(_x) ((unsigned long)((_x).l2_lo & PAGE_MASK))
-#define l2e_get_flags(_x) ((unsigned long)((_x).l2_lo & PAGE_FLAG_MASK))
-
-/* write access */
-static inline l1_pgentry_t l1e_empty(void)
-{
- l1_pgentry_t e = { .l1_lo = 0 };
- return e;
-}
-static inline l1_pgentry_t l1e_create_pfn(u32 pfn, u32 flags)
-{
- l1_pgentry_t e = { .l1_lo = (pfn << PAGE_SHIFT) | flags };
- return e;
-}
-static inline l1_pgentry_t l1e_create_phys(u32 addr, u32 flags)
-{
- l1_pgentry_t e = { .l1_lo = (addr & PAGE_MASK) | flags };
- return e;
-}
-static inline void l1e_add_flags(l1_pgentry_t *e, u32 flags)
-{
- e->l1_lo |= flags;
-}
-static inline void l1e_remove_flags(l1_pgentry_t *e, u32 flags)
-{
- e->l1_lo &= ~flags;
-}
-
-static inline l2_pgentry_t l2e_empty(void)
-{
- l2_pgentry_t e = { .l2_lo = 0 };
- return e;
-}
-static inline l2_pgentry_t l2e_create_pfn(u32 pfn, u32 flags)
-{
- l2_pgentry_t e = { .l2_lo = (pfn << PAGE_SHIFT) | flags };
- return e;
-}
-static inline l2_pgentry_t l2e_create_phys(u32 addr, u32 flags)
-{
- l2_pgentry_t e = { .l2_lo = (addr & PAGE_MASK) | flags };
- return e;
-}
-static inline void l2e_add_flags(l2_pgentry_t *e, u32 flags)
-{
- e->l2_lo |= flags;
-}
-static inline void l2e_remove_flags(l2_pgentry_t *e, u32 flags)
-{
- e->l2_lo &= ~flags;
-}
-
-/* check entries */
-static inline int l1e_has_changed(l1_pgentry_t *e1, l1_pgentry_t *e2, u32
flags)
-{
- return ((e1->l1_lo ^ e2->l1_lo) & (PAGE_MASK | flags)) != 0;
-}
-static inline int l2e_has_changed(l2_pgentry_t *e1, l2_pgentry_t *e2, u32
flags)
-{
- return ((e1->l2_lo ^ e2->l2_lo) & (PAGE_MASK | flags)) != 0;
-}
-
-#endif /* !__ASSEMBLY__ */
+#ifdef CONFIG_X86_PAE
+# include <asm/x86_32/page-3l.h>
+#else
+# include <asm/x86_32/page-2l.h>
+#endif
/* Pagetable walking. */
#define l2e_to_l1e(_x) \
((l1_pgentry_t *)__va(l2e_get_phys(_x)))
+#define l3e_to_l2e(_x) \
+ ((l2_pgentry_t *)__va(l3e_get_phys(_x)))
/* Given a virtual address, get an entry offset into a page table. */
#define l1_table_offset(_a) \
(((_a) >> L1_PAGETABLE_SHIFT) & (L1_PAGETABLE_ENTRIES - 1))
#define l2_table_offset(_a) \
- ((_a) >> L2_PAGETABLE_SHIFT)
+ (((_a) >> L2_PAGETABLE_SHIFT) & (L2_PAGETABLE_ENTRIES - 1))
+#define l3_table_offset(_a) \
+ ((_a) >> L3_PAGETABLE_SHIFT)
/* Given a virtual address, get an entry offset into a linear page table. */
-#define l1_linear_offset(_a) ((_a) >> PAGE_SHIFT)
-
-#define is_guest_l1_slot(_s) (1)
-#define is_guest_l2_slot(_s) ((_s) < ROOT_PAGETABLE_FIRST_XEN_SLOT)
-
-#define root_get_pfn l2e_get_pfn
-#define root_get_flags l2e_get_flags
-#define root_get_value l2e_get_value
-#define root_empty l2e_empty
-#define root_create_phys l2e_create_phys
-#define PGT_root_page_table PGT_l2_page_table
+#define l1_linear_offset(_a) ((_a) >> L1_PAGETABLE_SHIFT)
+#define l2_linear_offset(_a) ((_a) >> L2_PAGETABLE_SHIFT)
#define L1_DISALLOW_MASK (3UL << 7)
#define L2_DISALLOW_MASK (7UL << 7)
Index: xen/arch/x86/x86_32/traps.c
===================================================================
--- xen.orig/arch/x86/x86_32/traps.c 2005-05-13 12:37:10.000000000 +0200
+++ xen/arch/x86/x86_32/traps.c 2005-05-13 12:37:42.000000000 +0200
@@ -160,21 +160,24 @@ void show_registers(struct cpu_user_regs
void show_page_walk(unsigned long addr)
{
- unsigned long page;
+ l2_pgentry_t pmd;
+ l1_pgentry_t *pte;
if ( addr < PAGE_OFFSET )
return;
printk("Pagetable walk from %08lx:\n", addr);
- page = l2e_get_value(idle_pg_table[l2_table_offset(addr)]);
- printk(" L2 = %08lx %s\n", page, (page & _PAGE_PSE) ? "(4MB)" : "");
- if ( !(page & _PAGE_PRESENT) || (page & _PAGE_PSE) )
+ pmd = idle_pg_table_l2[l2_linear_offset(addr)];
+ printk(" L2 = %08llx %s\n", (u64)l2e_get_value(pmd),
+ (l2e_get_flags(pmd) & _PAGE_PSE) ? "(2/4MB)" : "");
+ if ( !(l2e_get_flags(pmd) & _PAGE_PRESENT) ||
+ (l2e_get_flags(pmd) & _PAGE_PSE) )
return;
- page &= PAGE_MASK;
- page = ((unsigned long *) __va(page))[l1_table_offset(addr)];
- printk(" L1 = %08lx\n", page);
+ pte = __va(l2e_get_phys(pmd));
+ pte += l1_table_offset(addr);
+ printk(" L1 = %08llx\n", (u64)l1e_get_value(*pte));
}
#define DOUBLEFAULT_STACK_SIZE 1024
Index: xen/arch/x86/x86_32/mm.c
===================================================================
--- xen.orig/arch/x86/x86_32/mm.c 2005-05-13 12:37:10.000000000 +0200
+++ xen/arch/x86/x86_32/mm.c 2005-05-13 15:14:18.000000000 +0200
@@ -36,13 +36,21 @@ int map_pages(
unsigned long s,
unsigned long flags)
{
+#if CONFIG_PAGING_LEVELS == 3
+ l3_pgentry_t *pl3e;
+#endif
l2_pgentry_t *pl2e;
l1_pgentry_t *pl1e;
void *newpg;
while ( s != 0 )
{
+#if CONFIG_PAGING_LEVELS == 3
+ pl3e = &pt[l3_table_offset(v)];
+ pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(v);
+#else
pl2e = &pt[l2_table_offset(v)];
+#endif
if ( ((s|v|p) & ((1<<L2_PAGETABLE_SHIFT)-1)) == 0 )
{
@@ -90,58 +98,83 @@ void __set_fixmap(
void __init paging_init(void)
{
void *ioremap_pt;
- unsigned long v;
+ unsigned long v,v2,i;
struct pfn_info *pg;
+#ifdef CONFIG_X86_PAE
+ printk("PAE enabled, limit: %d GB\n", MACHPHYS_MBYTES);
+#else
+ printk("PAE disabled.\n");
+#endif
+
/* Allocate and map the machine-to-phys table. */
- if ( (pg = alloc_domheap_pages(NULL, 10)) == NULL )
- panic("Not enough memory to bootstrap Xen.\n");
- idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)] =
- l2e_create_phys(page_to_phys(pg), __PAGE_HYPERVISOR | _PAGE_PSE);
- memset((void *)RDWR_MPT_VIRT_START, 0x55, 4UL << 20);
+ /* Create read-only mapping of MPT for guest-OS use. */
+ for (v = RDWR_MPT_VIRT_START, v2 = RO_MPT_VIRT_START;
+ v != RDWR_MPT_VIRT_END /* && (max_page * 4) >= (v -
RDWR_MPT_VIRT_START) */;
+ v += (1 << L2_PAGETABLE_SHIFT), v2 += (1 << L2_PAGETABLE_SHIFT)) {
+ if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER)) == NULL )
+ panic("Not enough memory to bootstrap Xen.\n");
+ idle_pg_table_l2[l2_linear_offset(v)] =
+ l2e_create_phys(page_to_phys(pg),
+ __PAGE_HYPERVISOR | _PAGE_PSE);
+ idle_pg_table_l2[l2_linear_offset(v2)] =
+ l2e_create_phys(page_to_phys(pg),
+ (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW);
+ }
+ memset((void *)RDWR_MPT_VIRT_START, 0x55, v - RDWR_MPT_VIRT_START);
- /* Xen 4MB mappings can all be GLOBAL. */
+ /* Xen 2/4MB mappings can all be GLOBAL. */
if ( cpu_has_pge )
{
- for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) )
- {
- if (l2e_get_flags(idle_pg_table[l2_table_offset(v)]) & _PAGE_PSE)
- l2e_add_flags(&idle_pg_table[l2_table_offset(v)],
- _PAGE_GLOBAL);
+ for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) ) {
+ if (!l2e_get_flags(idle_pg_table_l2[l2_linear_offset(v)]) &
_PAGE_PSE)
+ continue;
+ if (v >= RO_MPT_VIRT_START && v < RO_MPT_VIRT_END)
+ continue;
+ l2e_add_flags(&idle_pg_table_l2[l2_linear_offset(v)],
+ _PAGE_GLOBAL);
}
}
- /* Create page table for ioremap(). */
- ioremap_pt = (void *)alloc_xenheap_page();
- clear_page(ioremap_pt);
- idle_pg_table[l2_table_offset(IOREMAP_VIRT_START)] =
- l2e_create_phys(__pa(ioremap_pt), __PAGE_HYPERVISOR);
-
- /* Create read-only mapping of MPT for guest-OS use.
- * NB. Remove the global bit so that shadow_mode_translate()==true domains
- * can reused this address space for their phys-to-machine mapping.
- */
- idle_pg_table[l2_table_offset(RO_MPT_VIRT_START)] =
-
l2e_create_pfn(l2e_get_pfn(idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]),
-
l2e_get_flags(idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)])
- & ~(_PAGE_RW | _PAGE_GLOBAL));
+ /* Create page table(s) for ioremap(). */
+ for (v = IOREMAP_VIRT_START; v != IOREMAP_VIRT_END; v += (1 <<
L2_PAGETABLE_SHIFT)) {
+ ioremap_pt = (void *)alloc_xenheap_page();
+ clear_page(ioremap_pt);
+ idle_pg_table_l2[l2_linear_offset(v)] =
+ l2e_create_phys(__pa(ioremap_pt), __PAGE_HYPERVISOR);
+ }
/* Set up mapping cache for domain pages. */
- mapcache = (l1_pgentry_t *)alloc_xenheap_page();
- clear_page(mapcache);
- idle_pg_table[l2_table_offset(MAPCACHE_VIRT_START)] =
- l2e_create_phys(__pa(mapcache), __PAGE_HYPERVISOR);
+ mapcache = (l1_pgentry_t*)alloc_xenheap_pages(10-PAGETABLE_ORDER);
+ for (v = MAPCACHE_VIRT_START, i = 0;
+ v != MAPCACHE_VIRT_END;
+ v += (1 << L2_PAGETABLE_SHIFT), i++) {
+ clear_page(mapcache + i*L1_PAGETABLE_ENTRIES);
+ idle_pg_table_l2[l2_linear_offset(v)] =
+ l2e_create_phys(__pa(mapcache + i*L1_PAGETABLE_ENTRIES),
+ __PAGE_HYPERVISOR);
+ }
- /* Set up linear page table mapping. */
- idle_pg_table[l2_table_offset(LINEAR_PT_VIRT_START)] =
- l2e_create_phys(__pa(idle_pg_table), __PAGE_HYPERVISOR);
+ for (v = LINEAR_PT_VIRT_START; v != LINEAR_PT_VIRT_END; v += (1 <<
L2_PAGETABLE_SHIFT)) {
+ idle_pg_table_l2[l2_linear_offset(v)] =
+ l2e_create_phys(__pa(idle_pg_table_l2) + ((v-RDWR_MPT_VIRT_START)
>> PAGETABLE_ORDER),
+ __PAGE_HYPERVISOR);
+ }
}
-void __init zap_low_mappings(void)
+void __init zap_low_mappings(l2_pgentry_t *base)
{
int i;
- for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
- idle_pg_table[i] = l2e_empty();
+ u32 addr;
+
+ for (i = 0; ; i++) {
+ addr = (i << L2_PAGETABLE_SHIFT);
+ if (addr >= HYPERVISOR_VIRT_START)
+ break;
+ if (l2e_get_phys(base[i]) != addr)
+ continue;
+ base[i] = l2e_empty();
+ }
flush_tlb_all_pge();
}
@@ -163,12 +196,13 @@ void subarch_init_memory(struct domain *
offsetof(struct pfn_info, count_info),
offsetof(struct pfn_info, u.inuse._domain),
sizeof(struct pfn_info));
- for ( ; ; ) ;
+ for ( ; ; )
+ __asm__ __volatile__ ( "hlt" );
}
/* M2P table is mappable read-only by privileged domains. */
m2p_start_mfn = l2e_get_pfn(
- idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]);
+ idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START)]);
for ( i = 0; i < 1024; i++ )
{
frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
@@ -320,7 +354,7 @@ void *memguard_init(void *heap_start)
l1[j] = l1e_create_phys((i << L2_PAGETABLE_SHIFT) |
(j << L1_PAGETABLE_SHIFT),
__PAGE_HYPERVISOR);
- idle_pg_table[i + l2_table_offset(PAGE_OFFSET)] =
+ idle_pg_table_l2[i + l2_linear_offset(PAGE_OFFSET)] =
l2e_create_phys(virt_to_phys(l1), __PAGE_HYPERVISOR);
}
@@ -342,7 +376,7 @@ static void __memguard_change_range(void
while ( _l != 0 )
{
- l2 = &idle_pg_table[l2_table_offset(_p)];
+ l2 = &idle_pg_table_l2[l2_linear_offset(_p)];
l1 = l2e_to_l1e(*l2) + l1_table_offset(_p);
if ( guard )
l1e_remove_flags(l1, _PAGE_PRESENT);
Index: xen/include/public/arch-x86_32.h
===================================================================
--- xen.orig/include/public/arch-x86_32.h 2005-05-13 12:37:11.000000000
+0200
+++ xen/include/public/arch-x86_32.h 2005-05-13 12:37:42.000000000 +0200
@@ -72,7 +72,11 @@
* Virtual addresses beyond this are not modifiable by guest OSes. The
* machine->physical mapping table starts at this address, read-only.
*/
-#define HYPERVISOR_VIRT_START (0xFC000000UL)
+#ifdef CONFIG_X86_PAE
+# define HYPERVISOR_VIRT_START (0xF5800000UL)
+#else
+# define HYPERVISOR_VIRT_START (0xFC000000UL)
+#endif
#ifndef machine_to_phys_mapping
#define machine_to_phys_mapping ((u32 *)HYPERVISOR_VIRT_START)
#endif
Index: xen/common/elf.c
===================================================================
--- xen.orig/common/elf.c 2005-05-13 12:37:10.000000000 +0200
+++ xen/common/elf.c 2005-05-13 12:37:42.000000000 +0200
@@ -82,7 +82,20 @@ int parseelfimage(struct domain_setup_in
printk("ERROR: Xen will only load images built for Xen v3.0\n");
return -EINVAL;
}
-
+#if defined(__i386__)
+#ifdef CONFIG_X86_PAE
+ int xen_pae = 1;
+#else
+ int xen_pae = 0;
+#endif
+ int guest_pae = strstr(guestinfo, "PAE=yes") ? 1 : 0;
+ if (xen_pae != guest_pae) {
+ printk("ERROR: PAE mode mismatch (xen=%s,guest=%s)\n",
+ xen_pae ? "yes" : "no",
+ guest_pae ? "yes" : "no");
+ return -EINVAL;
+ }
+#endif
break;
}
if ( guestinfo == NULL )
Index: xen/arch/x86/x86_32/domain_page.c
===================================================================
--- xen.orig/arch/x86/x86_32/domain_page.c 2005-05-13 12:37:11.000000000
+0200
+++ xen/arch/x86/x86_32/domain_page.c 2005-05-13 12:37:42.000000000 +0200
@@ -72,7 +72,7 @@ void *map_domain_mem(unsigned long pa)
shadow_epoch[cpu] = ++epoch;
}
}
- while ( l1e_get_value(cache[idx]) != 0 );
+ while ( l1e_get_flags(cache[idx]) & _PAGE_PRESENT );
cache[idx] = l1e_create_phys(pa, __PAGE_HYPERVISOR);
Index: xen/include/asm-x86/mm.h
===================================================================
--- xen.orig/include/asm-x86/mm.h 2005-05-13 12:37:10.000000000 +0200
+++ xen/include/asm-x86/mm.h 2005-05-13 12:37:42.000000000 +0200
@@ -76,15 +76,15 @@ struct pfn_info
/* Owning guest has pinned this page to its current type? */
#define _PGT_pinned 27
#define PGT_pinned (1U<<_PGT_pinned)
- /* The 10 most significant bits of virt address if this is a page table. */
-#define PGT_va_shift 17
-#define PGT_va_mask (((1U<<10)-1)<<PGT_va_shift)
+ /* The 11 most significant bits of virt address if this is a page table. */
+#define PGT_va_shift 16
+#define PGT_va_mask (((1U<<11)-1)<<PGT_va_shift)
/* Is the back pointer still mutable (i.e. not fixed yet)? */
-#define PGT_va_mutable (((1U<<10)-1)<<PGT_va_shift)
+#define PGT_va_mutable (((1U<<11)-1)<<PGT_va_shift)
/* Is the back pointer unknown (e.g., p.t. is mapped at multiple VAs)? */
-#define PGT_va_unknown (((1U<<10)-2)<<PGT_va_shift)
- /* 17-bit count of uses of this frame as its current type. */
-#define PGT_count_mask ((1U<<17)-1)
+#define PGT_va_unknown (((1U<<11)-2)<<PGT_va_shift)
+ /* 16-bit count of uses of this frame as its current type. */
+#define PGT_count_mask ((1U<<16)-1)
#define PGT_mfn_mask ((1U<<20)-1) /* mfn mask for shadow types */
Index: xen/arch/x86/shadow.c
===================================================================
--- xen.orig/arch/x86/shadow.c 2005-05-13 12:37:11.000000000 +0200
+++ xen/arch/x86/shadow.c 2005-05-13 12:55:14.000000000 +0200
@@ -358,13 +358,13 @@ free_shadow_hl2_table(struct domain *d,
}
static void inline
-free_shadow_l2_table(struct domain *d, unsigned long smfn)
+free_shadow_l2_table(struct domain *d, unsigned long smfn, unsigned int type)
{
l2_pgentry_t *pl2e = map_domain_mem(smfn << PAGE_SHIFT);
int i, external = shadow_mode_external(d);
for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
- if ( external || is_guest_l2_slot(i) )
+ if ( external || is_guest_l2_slot(type, i) )
if ( l2e_get_flags(pl2e[i]) & _PAGE_PRESENT )
put_shadow_ref(l2e_get_pfn(pl2e[i]));
@@ -404,7 +404,7 @@ void free_shadow_page(unsigned long smfn
case PGT_l2_shadow:
perfc_decr(shadow_l2_pages);
shadow_demote(d, gpfn, gmfn);
- free_shadow_l2_table(d, smfn);
+ free_shadow_l2_table(d, smfn, page->u.inuse.type_info);
break;
case PGT_hl2_shadow:
@@ -1093,7 +1093,8 @@ translate_l1pgtable(struct domain *d, l1
// up dom0.
//
void
-translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn)
+translate_l2pgtable(struct domain *d, l1_pgentry_t *p2m, unsigned long l2mfn,
+ unsigned int type)
{
int i;
l2_pgentry_t *l2;
@@ -1103,7 +1104,7 @@ translate_l2pgtable(struct domain *d, l1
l2 = map_domain_mem(l2mfn << PAGE_SHIFT);
for (i = 0; i < L2_PAGETABLE_ENTRIES; i++)
{
- if ( is_guest_l2_slot(i) &&
+ if ( is_guest_l2_slot(type, i) &&
(l2e_get_flags(l2[i]) & _PAGE_PRESENT) )
{
unsigned long mfn = l2e_get_pfn(l2[i]);
@@ -1409,8 +1410,8 @@ gpfn_to_mfn_foreign(struct domain *d, un
unmap_domain_mem(l2);
if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
{
- printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l2e=%lx\n",
- d->domain_id, gpfn, l2e_get_value(l2e));
+ printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l2e=%llx\n",
+ d->domain_id, gpfn, (u64)l2e_get_value(l2e));
return INVALID_MFN;
}
unsigned long l1tab = l2e_get_phys(l2e);
@@ -1425,8 +1426,8 @@ gpfn_to_mfn_foreign(struct domain *d, un
if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) )
{
- printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l1e=%lx\n",
- d->domain_id, gpfn, l1e_get_value(l1e));
+ printk("gpfn_to_mfn_foreign(d->id=%d, gpfn=%lx) => 0 l1e=%llx\n",
+ d->domain_id, gpfn, (u64)l1e_get_value(l1e));
return INVALID_MFN;
}
@@ -2389,7 +2390,10 @@ static int resync_all(struct domain *d,
changed = 0;
for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
{
- if ( !is_guest_l2_slot(i) && !external )
+#if CONFIG_X86_PAE
+ BUG(); /* FIXME: need type_info */
+#endif
+ if ( !is_guest_l2_slot(0,i) && !external )
continue;
l2_pgentry_t new_pde = guest2[i];
@@ -2432,7 +2436,10 @@ static int resync_all(struct domain *d,
changed = 0;
for ( i = 0; i < L2_PAGETABLE_ENTRIES; i++ )
{
- if ( !is_guest_l2_slot(i) && !external )
+#if CONFIG_X86_PAE
+ BUG(); /* FIXME: need type_info */
+#endif
+ if ( !is_guest_l2_slot(0, i) && !external )
continue;
l2_pgentry_t new_pde = guest2[i];
@@ -2645,8 +2652,8 @@ int shadow_fault(unsigned long va, struc
&gpte, sizeof(gpte))) )
{
printk("%s() failed, crashing domain %d "
- "due to a read-only L2 page table (gpde=%lx), va=%lx\n",
- __func__, d->domain_id, l2e_get_value(gpde), va);
+ "due to a read-only L2 page table (gpde=%llx), va=%lx\n",
+ __func__,d->domain_id, (u64)l2e_get_value(gpde), va);
domain_crash_synchronous();
}
@@ -2719,7 +2726,7 @@ void shadow_l2_normal_pt_update(
shadow_unlock(d);
}
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
void shadow_l3_normal_pt_update(
struct domain *d,
unsigned long pa, l3_pgentry_t gpde,
@@ -2727,7 +2734,9 @@ void shadow_l3_normal_pt_update(
{
BUG(); // not yet implemented
}
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
void shadow_l4_normal_pt_update(
struct domain *d,
unsigned long pa, l4_pgentry_t gpde,
Index: xen/include/asm-x86/types.h
===================================================================
--- xen.orig/include/asm-x86/types.h 2005-05-13 12:37:10.000000000 +0200
+++ xen/include/asm-x86/types.h 2005-05-13 12:37:42.000000000 +0200
@@ -44,11 +44,17 @@ typedef signed long long s64;
typedef unsigned long long u64;
#define BITS_PER_LONG 32
typedef unsigned int size_t;
+#if defined(CONFIG_X86_PAE)
+typedef u64 physaddr_t;
+#else
+typedef u32 physaddr_t;
+#endif
#elif defined(__x86_64__)
typedef signed long s64;
typedef unsigned long u64;
#define BITS_PER_LONG 64
typedef unsigned long size_t;
+typedef u64 physaddr_t;
#endif
/* DMA addresses come in generic and 64-bit flavours. */
Index: xen/include/asm-x86/shadow.h
===================================================================
--- xen.orig/include/asm-x86/shadow.h 2005-05-13 12:37:10.000000000 +0200
+++ xen/include/asm-x86/shadow.h 2005-05-13 12:51:23.000000000 +0200
@@ -126,10 +126,12 @@ extern void shadow_l1_normal_pt_update(s
extern void shadow_l2_normal_pt_update(struct domain *d,
unsigned long pa, l2_pgentry_t l2e,
struct map_dom_mem_cache *cache);
-#ifdef __x86_64__
+#if CONFIG_PAGING_LEVELS >= 3
extern void shadow_l3_normal_pt_update(struct domain *d,
unsigned long pa, l3_pgentry_t l3e,
struct map_dom_mem_cache *cache);
+#endif
+#if CONFIG_PAGING_LEVELS >= 4
extern void shadow_l4_normal_pt_update(struct domain *d,
unsigned long pa, l4_pgentry_t l4e,
struct map_dom_mem_cache *cache);
Index: xen/include/asm-x86/smp.h
===================================================================
--- xen.orig/include/asm-x86/smp.h 2005-05-13 12:37:10.000000000 +0200
+++ xen/include/asm-x86/smp.h 2005-05-13 12:47:49.000000000 +0200
@@ -38,7 +38,7 @@ extern cpumask_t cpu_sibling_map[];
extern void smp_flush_tlb(void);
extern void smp_invalidate_rcv(void); /* Process an NMI */
extern void (*mtrr_hook) (void);
-extern void zap_low_mappings (void);
+extern void zap_low_mappings(l2_pgentry_t *base);
#define MAX_APICID 256
extern u8 x86_cpu_to_apicid[];
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|