The 1:1 page table should be a 3 level PAE page table on x86-64
This is needed to support > 4GB machine physical addresses.
Signed-off-by: Chengyuan Li <chengyuan.li@xxxxxxxxx>
Signed-off-by: Arun Sharma <arun.sharma@xxxxxxxxx>
--- a/tools/libxc/xc_vmx_build.c Mon Jul 11 05:02:12 2005
+++ b/tools/libxc/xc_vmx_build.c Mon Jul 11 05:04:22 2005
@@ -13,6 +13,9 @@
#define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
#define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#ifdef __x86_64__
+#define L3_PROT (_PAGE_PRESENT)
+#endif
#define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
#define round_pgdown(_p) ((_p)&PAGE_MASK)
@@ -91,6 +94,7 @@
mem_mapp->nr_map = nr_map;
}
+#ifdef __i386__
static int zap_mmio_range(int xc_handle, u32 dom,
l2_pgentry_32_t *vl2tab,
unsigned long mmio_range_start,
@@ -138,6 +142,65 @@
munmap(vl2tab, PAGE_SIZE);
return 0;
}
+#else
+static int zap_mmio_range(int xc_handle, u32 dom,
+ l3_pgentry_t *vl3tab,
+ unsigned long mmio_range_start,
+ unsigned long mmio_range_size)
+{
+ unsigned long mmio_addr;
+ unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
+ unsigned long vl2e = 0;
+ unsigned long vl3e;
+ l1_pgentry_t *vl1tab;
+ l2_pgentry_t *vl2tab;
+
+ mmio_addr = mmio_range_start & PAGE_MASK;
+ for (; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE) {
+ vl3e = vl3tab[l3_table_offset(mmio_addr)];
+ if (vl3e == 0)
+ continue;
+ vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE, vl3e >> PAGE_SHIFT);
+ if (vl2tab == 0) {
+ PERROR("Failed zap MMIO range");
+ return -1;
+ }
+ vl2e = vl2tab[l2_table_offset(mmio_addr)];
+ if (vl2e == 0)
+ continue;
+ vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
+
+ vl1tab[l1_table_offset(mmio_addr)] = 0;
+ munmap(vl2tab, PAGE_SIZE);
+ munmap(vl1tab, PAGE_SIZE);
+ }
+ return 0;
+}
+
+static int zap_mmio_ranges(int xc_handle, u32 dom,
+ unsigned long l3tab,
+ struct mem_map *mem_mapp)
+{
+ int i;
+ l3_pgentry_t *vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE,
+ l3tab >> PAGE_SHIFT);
+ if (vl3tab == 0)
+ return -1;
+ for (i = 0; i < mem_mapp->nr_map; i++) {
+ if ((mem_mapp->map[i].type == E820_IO)
+ && (mem_mapp->map[i].caching_attr == MEMMAP_UC))
+ if (zap_mmio_range(xc_handle, dom, vl3tab,
+ mem_mapp->map[i].addr, mem_mapp->map[i].size) == -1)
+ return -1;
+ }
+ munmap(vl3tab, PAGE_SIZE);
+ return 0;
+}
+
+#endif
static int setup_guest(int xc_handle,
u32 dom, int memsize,
@@ -151,9 +214,13 @@
unsigned long flags,
struct mem_map * mem_mapp)
{
- l1_pgentry_32_t *vl1tab=NULL, *vl1e=NULL;
- l2_pgentry_32_t *vl2tab=NULL, *vl2e=NULL;
+ l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
+ l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
unsigned long *page_array = NULL;
+#ifdef __x86_64__
+ l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
+ unsigned long l3tab;
+#endif
unsigned long l2tab;
unsigned long l1tab;
unsigned long count, i;
@@ -212,7 +279,11 @@
if(initrd_len == 0)
vinitrd_start = vinitrd_end = 0;
+#ifdef __i386__
nr_pt_pages = 1 + ((memsize + 3) >> 2);
+#else
+ nr_pt_pages = 5 + ((memsize + 1) >> 1);
+#endif
vpt_start = v_end;
vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
@@ -274,6 +345,7 @@
if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
goto error_out;
+#ifdef __i386__
/* First allocate page for page dir. */
ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
@@ -310,7 +382,64 @@
}
munmap(vl1tab, PAGE_SIZE);
munmap(vl2tab, PAGE_SIZE);
-
+#else
+ /* First allocate pdpt */
+ ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
+ /* here l3tab means pdpt, only 4 entry is used */
+ l3tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+ ctxt->ctrlreg[3] = l3tab;
+
+ /* Initialise the page tables. */
+ if ( (vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE,
+ l3tab >> PAGE_SHIFT)) == NULL )
+ goto error_out;
+ memset(vl3tab, 0, PAGE_SIZE);
+
+ vl3e = &vl3tab[l3_table_offset(dsi.v_start)];
+
+ for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
+ {
+ if (!(count % (1 << (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)))){
+ l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+
+ if (vl2tab != NULL)
+ munmap(vl2tab, PAGE_SIZE);
+
+ if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE,
+ l2tab >> PAGE_SHIFT)) == NULL )
+ goto error_out;
+
+ memset(vl2tab, 0, PAGE_SIZE);
+ *vl3e++ = l2tab | L3_PROT;
+ vl2e = &vl2tab[l2_table_offset(dsi.v_start + (count <<
PAGE_SHIFT))];
+ }
+ if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
+ {
+ l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+ if ( vl1tab != NULL )
+ munmap(vl1tab, PAGE_SIZE);
+ if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+ PROT_READ|PROT_WRITE,
+ l1tab >> PAGE_SHIFT)) == NULL )
+ {
+ munmap(vl2tab, PAGE_SIZE);
+ goto error_out;
+ }
+ memset(vl1tab, 0, PAGE_SIZE);
+ vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
+ *vl2e++ = l1tab | L2_PROT;
+ }
+
+ *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
+ vl1e++;
+ }
+
+ munmap(vl1tab, PAGE_SIZE);
+ munmap(vl2tab, PAGE_SIZE);
+ munmap(vl3tab, PAGE_SIZE);
+#endif
/* Write the machine->phys table entries. */
for ( count = 0; count < nr_pages; count++ )
{
@@ -325,6 +454,7 @@
xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
page_array[(vboot_params_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
goto error_out;
+
memset(boot_paramsp, 0, sizeof(*boot_paramsp));
strncpy((char *)boot_paramsp->cmd_line, cmdline, 0x800);
@@ -381,7 +511,11 @@
/* memsize is in megabytes */
build_e820map(mem_mapp, memsize << 20);
+#if defined (__i386__)
if (zap_mmio_ranges(xc_handle, dom, l2tab, mem_mapp) == -1)
+#else
+ if (zap_mmio_ranges(xc_handle, dom, l3tab, mem_mapp) == -1)
+#endif
goto error_out;
boot_paramsp->e820_map_nr = mem_mapp->nr_map;
for (i=0; i<mem_mapp->nr_map; i++) {
diff -r 036c6e463f67 -r 51dd38b2b917 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c Mon Jul 11 05:02:12 2005
+++ b/xen/arch/x86/vmx.c Mon Jul 11 05:04:22 2005
@@ -801,7 +801,11 @@
skip_cr3:
error |= __vmread(CR4_READ_SHADOW, &old_cr4);
+#if defined (__i386__)
error |= __vmwrite(GUEST_CR4, (c->cr4 | X86_CR4_VMXE));
+#else
+ error |= __vmwrite(GUEST_CR4, (c->cr4 | X86_CR4_VMXE | X86_CR4_PAE));
+#endif
error |= __vmwrite(CR4_READ_SHADOW, c->cr4);
error |= __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
@@ -860,7 +864,7 @@
{
struct vmx_assist_context c;
u32 magic;
- unsigned long cp;
+ u32 cp;
/* make sure vmxassist exists (this is not an error) */
if (!vmx_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), COPY_IN))
@@ -1191,7 +1195,7 @@
__vmread(CR4_READ_SHADOW, &old_cr);
if (pae_disabled)
- __vmwrite(GUEST_CR4, ((value & ~X86_CR4_PAE) | X86_CR4_VMXE));
+ __vmwrite(GUEST_CR4, value| X86_CR4_VMXE);
else
__vmwrite(GUEST_CR4, value| X86_CR4_VMXE);
diff -r 036c6e463f67 -r 51dd38b2b917 xen/arch/x86/vmx_vmcs.c
--- a/xen/arch/x86/vmx_vmcs.c Mon Jul 11 05:02:12 2005
+++ b/xen/arch/x86/vmx_vmcs.c Mon Jul 11 05:04:22 2005
@@ -122,6 +122,7 @@
struct e820entry *e820p;
unsigned long gpfn = 0;
+ local_flush_tlb_pge();
regs->ebx = 0; /* Linux expects ebx to be 0 for boot proc */
n = regs->ecx;
@@ -311,8 +312,7 @@
error |= __vmwrite(CR0_READ_SHADOW, shadow_cr);
/* CR3 is set in vmx_final_setup_guest */
#ifdef __x86_64__
- error |= __vmwrite(GUEST_CR4, host_env->cr4 & ~X86_CR4_PAE);
- printk("construct_init_vmcs_guest: guest CR4 is %lx\n", host_env->cr4 );
+ error |= __vmwrite(GUEST_CR4, host_env->cr4 & ~X86_CR4_PSE);
#else
error |= __vmwrite(GUEST_CR4, host_env->cr4);
#endif
diff -r 036c6e463f67 -r 51dd38b2b917 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py Mon Jul 11 05:02:12 2005
+++ b/tools/python/xen/xend/image.py Mon Jul 11 05:04:22 2005
@@ -351,6 +351,8 @@
@param mem_mb: size in MB
@return size in KB
"""
- # Logic x86-32 specific.
# 1 page for the PGD + 1 pte page for 4MB of memory (rounded)
- return (1 + ((mem_mb + 3) >> 2)) * 4
+ if os.uname()[4] == 'x86_64':
+ return (5 + ((mem_mb + 1) >> 1)) * 4
+ else:
+ return (1 + ((mem_mb + 3) >> 2)) * 4
diff -r 036c6e463f67 -r 51dd38b2b917 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h Mon Jul 11 05:02:12 2005
+++ b/xen/include/asm-x86/mm.h Mon Jul 11 05:04:22 2005
@@ -349,4 +349,7 @@
l1_pgentry_t _nl1e,
struct domain *d,
struct vcpu *v);
+
+void alloc_monitor_pagetable(struct vcpu *v);
+
#endif /* __ASM_X86_MM_H__ */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|