WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-changelog

[Xen-changelog] The 1:1 page table should be a 3 level PAE page table on

To: xen-changelog@xxxxxxxxxxxxxxxxxxx
Subject: [Xen-changelog] The 1:1 page table should be a 3 level PAE page table on x86-64
From: Xen patchbot -unstable <patchbot-unstable@xxxxxxxxxxxxxxxxxxx>
Date: Mon, 11 Jul 2005 10:44:10 -0400
Delivery-date: Mon, 11 Jul 2005 14:44:36 +0000
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-changelog-request@lists.xensource.com?subject=help>
List-id: BK change log <xen-changelog.lists.xensource.com>
List-post: <mailto:xen-changelog@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/cgi-bin/mailman/listinfo/xen-changelog>, <mailto:xen-changelog-request@lists.xensource.com?subject=unsubscribe>
Reply-to: xen-devel@xxxxxxxxxxxxxxxxxxx
Sender: xen-changelog-bounces@xxxxxxxxxxxxxxxxxxx
# HG changeset patch
# User kaf24@xxxxxxxxxxxxxxxxxxxx
# Node ID ba925b4aef289630b4e2102f56b07c6c603c4058
# Parent  a29b4174d39c403cffcc99229a77bfc3aa9768e3

The 1:1 page table should be a 3 level PAE page table on x86-64 

This is needed to support > 4GB machine physical addresses.

Signed-off-by: Chengyuan Li <chengyuan.li@xxxxxxxxx>
Signed-off-by: Arun Sharma <arun.sharma@xxxxxxxxx>

diff -r a29b4174d39c -r ba925b4aef28 xen/arch/x86/shadow_public.c
--- a/xen/arch/x86/shadow_public.c      Mon Jul 11 10:23:19 2005
+++ b/xen/arch/x86/shadow_public.c      Mon Jul 11 14:39:10 2005
@@ -162,7 +162,7 @@
     return mk_pagetable(page_to_phys(l4page));
 }
 
-void alloc_monitor_pagetable(struct vcpu *v)
+static void alloc_monitor_pagetable(struct vcpu *v)
 {
     unsigned long mmfn;
     l4_pgentry_t *mpl4e;
diff -r a29b4174d39c -r ba925b4aef28 tools/libxc/xc_vmx_build.c
--- a/tools/libxc/xc_vmx_build.c        Mon Jul 11 10:23:19 2005
+++ b/tools/libxc/xc_vmx_build.c        Mon Jul 11 14:39:10 2005
@@ -13,6 +13,9 @@
 
 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
+#ifdef __x86_64__
+#define L3_PROT (_PAGE_PRESENT)
+#endif
 
 #define round_pgup(_p)    (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
 #define round_pgdown(_p)  ((_p)&PAGE_MASK)
@@ -91,6 +94,7 @@
     mem_mapp->nr_map = nr_map;
 }
 
+#ifdef __i386__
 static int zap_mmio_range(int xc_handle, u32 dom,
                             l2_pgentry_32_t *vl2tab,
                             unsigned long mmio_range_start,
@@ -138,6 +142,65 @@
     munmap(vl2tab, PAGE_SIZE);
     return 0;
 }
+#else
+static int zap_mmio_range(int xc_handle, u32 dom,
+                           l3_pgentry_t *vl3tab,
+                           unsigned long mmio_range_start,
+                           unsigned long mmio_range_size)
+{
+   unsigned long mmio_addr;
+   unsigned long mmio_range_end = mmio_range_start + mmio_range_size;
+   unsigned long vl2e = 0;
+   unsigned long vl3e;
+   l1_pgentry_t *vl1tab;
+   l2_pgentry_t *vl2tab;
+ 
+   mmio_addr = mmio_range_start & PAGE_MASK;
+   for (; mmio_addr < mmio_range_end; mmio_addr += PAGE_SIZE) {
+       vl3e = vl3tab[l3_table_offset(mmio_addr)];
+       if (vl3e == 0)
+           continue;
+       vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+               PROT_READ|PROT_WRITE, vl3e >> PAGE_SHIFT);
+       if (vl2tab == 0) {
+           PERROR("Failed zap MMIO range");
+           return -1;
+       }
+       vl2e = vl2tab[l2_table_offset(mmio_addr)];
+       if (vl2e == 0)
+           continue;
+       vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+               PROT_READ|PROT_WRITE, vl2e >> PAGE_SHIFT);
+
+       vl1tab[l1_table_offset(mmio_addr)] = 0;
+       munmap(vl2tab, PAGE_SIZE);
+       munmap(vl1tab, PAGE_SIZE);
+   }
+   return 0;
+}
+
+static int zap_mmio_ranges(int xc_handle, u32 dom,
+                           unsigned long l3tab,
+                           struct mem_map *mem_mapp)
+{
+   int i;
+   l3_pgentry_t *vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                                               PROT_READ|PROT_WRITE,
+                                               l3tab >> PAGE_SHIFT);
+   if (vl3tab == 0)
+       return -1;
+   for (i = 0; i < mem_mapp->nr_map; i++) {
+       if ((mem_mapp->map[i].type == E820_IO)
+         && (mem_mapp->map[i].caching_attr == MEMMAP_UC))
+           if (zap_mmio_range(xc_handle, dom, vl3tab,
+                       mem_mapp->map[i].addr, mem_mapp->map[i].size) == -1)
+               return -1;
+   }
+   munmap(vl3tab, PAGE_SIZE);
+   return 0;
+}
+
+#endif
 
 static int setup_guest(int xc_handle,
                          u32 dom, int memsize,
@@ -151,9 +214,13 @@
                          unsigned long flags,
                          struct mem_map * mem_mapp)
 {
-    l1_pgentry_32_t *vl1tab=NULL, *vl1e=NULL;
-    l2_pgentry_32_t *vl2tab=NULL, *vl2e=NULL;
+    l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
+    l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
     unsigned long *page_array = NULL;
+#ifdef __x86_64__
+    l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
+    unsigned long l3tab;
+#endif
     unsigned long l2tab;
     unsigned long l1tab;
     unsigned long count, i;
@@ -212,7 +279,11 @@
     if(initrd_len == 0)
         vinitrd_start = vinitrd_end = 0;
 
+#ifdef __i386__
     nr_pt_pages = 1 + ((memsize + 3) >> 2);
+#else
+    nr_pt_pages = 5 + ((memsize + 1) >> 1);
+#endif
     vpt_start   = v_end;
     vpt_end     = vpt_start + (nr_pt_pages * PAGE_SIZE);
 
@@ -274,6 +345,7 @@
     if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
         goto error_out;
 
+#ifdef __i386__
     /* First allocate page for page dir. */
     ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
     l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
@@ -310,7 +382,64 @@
     }
     munmap(vl1tab, PAGE_SIZE);
     munmap(vl2tab, PAGE_SIZE);
-
+#else
+    /* First allocate pdpt */
+    ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
+    /* here l3tab means pdpt, only 4 entry is used */
+    l3tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+    ctxt->ctrlreg[3] = l3tab;
+
+    /* Initialise the page tables. */
+    if ( (vl3tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, 
+                                        PROT_READ|PROT_WRITE, 
+                                        l3tab >> PAGE_SHIFT)) == NULL )
+        goto error_out;
+    memset(vl3tab, 0, PAGE_SIZE);
+
+    vl3e = &vl3tab[l3_table_offset(dsi.v_start)];
+
+    for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
+    {
+        if (!(count % (1 << (L3_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)))){
+            l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+
+            if (vl2tab != NULL)
+                munmap(vl2tab, PAGE_SIZE);
+
+            if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                      PROT_READ|PROT_WRITE,
+                      l2tab >> PAGE_SHIFT)) == NULL )
+                goto error_out;
+
+            memset(vl2tab, 0, PAGE_SIZE);
+            *vl3e++ = l2tab | L3_PROT;
+            vl2e = &vl2tab[l2_table_offset(dsi.v_start + (count << 
PAGE_SHIFT))];
+        }
+        if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
+        {
+            l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
+            if ( vl1tab != NULL )
+                munmap(vl1tab, PAGE_SIZE);
+            if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
+                      PROT_READ|PROT_WRITE,
+                      l1tab >> PAGE_SHIFT)) == NULL )
+            {
+                munmap(vl2tab, PAGE_SIZE);
+                goto error_out;
+            }
+            memset(vl1tab, 0, PAGE_SIZE);
+            vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
+            *vl2e++ = l1tab | L2_PROT;
+        }
+
+        *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
+        vl1e++;
+    }
+
+    munmap(vl1tab, PAGE_SIZE);
+    munmap(vl2tab, PAGE_SIZE);
+    munmap(vl3tab, PAGE_SIZE);
+#endif
     /* Write the machine->phys table entries. */
     for ( count = 0; count < nr_pages; count++ )
     {
@@ -325,6 +454,7 @@
                xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
                page_array[(vboot_params_start-dsi.v_start)>>PAGE_SHIFT])) == 0)
         goto error_out;
+
     memset(boot_paramsp, 0, sizeof(*boot_paramsp));
 
     strncpy((char *)boot_paramsp->cmd_line, cmdline, 0x800);
@@ -381,7 +511,11 @@
 
     /* memsize is in megabytes */
     build_e820map(mem_mapp, memsize << 20);
+#if defined (__i386__)
     if (zap_mmio_ranges(xc_handle, dom, l2tab, mem_mapp) == -1)
+#else
+    if (zap_mmio_ranges(xc_handle, dom, l3tab, mem_mapp) == -1)
+#endif
        goto error_out;
     boot_paramsp->e820_map_nr = mem_mapp->nr_map;
     for (i=0; i<mem_mapp->nr_map; i++) {
diff -r a29b4174d39c -r ba925b4aef28 xen/arch/x86/vmx.c
--- a/xen/arch/x86/vmx.c        Mon Jul 11 10:23:19 2005
+++ b/xen/arch/x86/vmx.c        Mon Jul 11 14:39:10 2005
@@ -801,7 +801,11 @@
 skip_cr3:
 
     error |= __vmread(CR4_READ_SHADOW, &old_cr4);
+#if defined (__i386__)
     error |= __vmwrite(GUEST_CR4, (c->cr4 | X86_CR4_VMXE));
+#else
+    error |= __vmwrite(GUEST_CR4, (c->cr4 | X86_CR4_VMXE | X86_CR4_PAE));
+#endif
     error |= __vmwrite(CR4_READ_SHADOW, c->cr4);
 
     error |= __vmwrite(GUEST_IDTR_LIMIT, c->idtr_limit);
@@ -860,7 +864,7 @@
 {
     struct vmx_assist_context c;
     u32 magic;
-    unsigned long cp;
+    u32 cp;
 
     /* make sure vmxassist exists (this is not an error) */
     if (!vmx_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), COPY_IN))
@@ -1191,7 +1195,7 @@
 
         __vmread(CR4_READ_SHADOW, &old_cr);
         if (pae_disabled)
-            __vmwrite(GUEST_CR4, ((value & ~X86_CR4_PAE) | X86_CR4_VMXE));
+            __vmwrite(GUEST_CR4, value| X86_CR4_VMXE);
         else
             __vmwrite(GUEST_CR4, value| X86_CR4_VMXE);
 
diff -r a29b4174d39c -r ba925b4aef28 xen/arch/x86/shadow32.c
--- a/xen/arch/x86/shadow32.c   Mon Jul 11 10:23:19 2005
+++ b/xen/arch/x86/shadow32.c   Mon Jul 11 14:39:10 2005
@@ -677,7 +677,7 @@
     return shadow_mode_refcounts(d);
 }
 
-void alloc_monitor_pagetable(struct vcpu *v)
+static void alloc_monitor_pagetable(struct vcpu *v)
 {
     unsigned long mmfn;
     l2_pgentry_t *mpl2e;
diff -r a29b4174d39c -r ba925b4aef28 xen/arch/x86/vmx_vmcs.c
--- a/xen/arch/x86/vmx_vmcs.c   Mon Jul 11 10:23:19 2005
+++ b/xen/arch/x86/vmx_vmcs.c   Mon Jul 11 14:39:10 2005
@@ -122,6 +122,7 @@
     struct e820entry *e820p;
     unsigned long gpfn = 0;
 
+    local_flush_tlb_pge();
     regs->ebx = 0;   /* Linux expects ebx to be 0 for boot proc */
 
     n = regs->ecx;
@@ -311,8 +312,7 @@
     error |= __vmwrite(CR0_READ_SHADOW, shadow_cr);
     /* CR3 is set in vmx_final_setup_guest */
 #ifdef __x86_64__
-    error |= __vmwrite(GUEST_CR4, host_env->cr4 & ~X86_CR4_PAE);
-    printk("construct_init_vmcs_guest: guest CR4 is %lx\n", host_env->cr4 );
+    error |= __vmwrite(GUEST_CR4, host_env->cr4 & ~X86_CR4_PSE);
 #else
     error |= __vmwrite(GUEST_CR4, host_env->cr4);
 #endif
diff -r a29b4174d39c -r ba925b4aef28 tools/python/xen/xend/image.py
--- a/tools/python/xen/xend/image.py    Mon Jul 11 10:23:19 2005
+++ b/tools/python/xen/xend/image.py    Mon Jul 11 14:39:10 2005
@@ -351,6 +351,8 @@
         @param mem_mb: size in MB
         @return size in KB
         """
-        # Logic x86-32 specific. 
         # 1 page for the PGD + 1 pte page for 4MB of memory (rounded)
-        return (1 + ((mem_mb + 3) >> 2)) * 4
+        if os.uname()[4] == 'x86_64':
+            return (5 + ((mem_mb + 1) >> 1)) * 4
+        else:
+            return (1 + ((mem_mb + 3) >> 2)) * 4

_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog

<Prev in Thread] Current Thread [Next in Thread>
  • [Xen-changelog] The 1:1 page table should be a 3 level PAE page table on x86-64, Xen patchbot -unstable <=